WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

Rev	Author	Line No.	Line
5564	serge	1	/**************************************************************************
		2	*
		3	* Copyright 2009 VMware, Inc.
		4	* All Rights Reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the
		8	* "Software"), to deal in the Software without restriction, including
		9	* without limitation the rights to use, copy, modify, merge, publish,
		10	* distribute, sub license, and/or sell copies of the Software, and to
		11	* permit persons to whom the Software is furnished to do so, subject to
		12	* the following conditions:
		13	*
		14	* The above copyright notice and this permission notice (including the
		15	* next paragraph) shall be included in all copies or substantial portions
		16	* of the Software.
		17	*
		18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		19	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
		21	* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
		22	* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
		23	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
		24	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25	*
		26	**************************************************************************/
		27
		28	/**
		29	* @file
		30	* Texture sampling -- SoA.
		31	*
		32	* @author Jose Fonseca
		33	* @author Brian Paul
		34	*/
		35
		36	#include "pipe/p_defines.h"
		37	#include "pipe/p_state.h"
		38	#include "pipe/p_shader_tokens.h"
		39	#include "util/u_debug.h"
		40	#include "util/u_dump.h"
		41	#include "util/u_memory.h"
		42	#include "util/u_math.h"
		43	#include "util/u_format.h"
		44	#include "util/u_cpu_detect.h"
		45	#include "util/u_format_rgb9e5.h"
		46	#include "lp_bld_debug.h"
		47	#include "lp_bld_type.h"
		48	#include "lp_bld_const.h"
		49	#include "lp_bld_conv.h"
		50	#include "lp_bld_arit.h"
		51	#include "lp_bld_bitarit.h"
		52	#include "lp_bld_logic.h"
		53	#include "lp_bld_printf.h"
		54	#include "lp_bld_swizzle.h"
		55	#include "lp_bld_flow.h"
		56	#include "lp_bld_gather.h"
		57	#include "lp_bld_format.h"
		58	#include "lp_bld_sample.h"
		59	#include "lp_bld_sample_aos.h"
		60	#include "lp_bld_struct.h"
		61	#include "lp_bld_quad.h"
		62	#include "lp_bld_pack.h"
		63
		64
		65	/**
		66	* Generate code to fetch a texel from a texture at int coords (x, y, z).
		67	* The computation depends on whether the texture is 1D, 2D or 3D.
		68	* The result, texel, will be float vectors:
		69	* texel[0] = red values
		70	* texel[1] = green values
		71	* texel[2] = blue values
		72	* texel[3] = alpha values
		73	*/
		74	static void
		75	lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
		76	LLVMValueRef width,
		77	LLVMValueRef height,
		78	LLVMValueRef depth,
		79	LLVMValueRef x,
		80	LLVMValueRef y,
		81	LLVMValueRef z,
		82	LLVMValueRef y_stride,
		83	LLVMValueRef z_stride,
		84	LLVMValueRef data_ptr,
		85	LLVMValueRef mipoffsets,
		86	LLVMValueRef texel_out[4])
		87	{
		88	const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
		89	const unsigned dims = bld->dims;
		90	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		91	LLVMBuilderRef builder = bld->gallivm->builder;
		92	LLVMValueRef offset;
		93	LLVMValueRef i, j;
		94	LLVMValueRef use_border = NULL;
		95
		96	/* use_border = x < 0 \|\| x >= width \|\| y < 0 \|\| y >= height */
		97	if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
		98	static_state->min_img_filter,
		99	static_state->mag_img_filter)) {
		100	LLVMValueRef b1, b2;
		101	b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
		102	b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
		103	use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
		104	}
		105
		106	if (dims >= 2 &&
		107	lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
		108	static_state->min_img_filter,
		109	static_state->mag_img_filter)) {
		110	LLVMValueRef b1, b2;
		111	b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
		112	b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
		113	if (use_border) {
		114	use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
		115	use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
		116	}
		117	else {
		118	use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
		119	}
		120	}
		121
		122	if (dims == 3 &&
		123	lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
		124	static_state->min_img_filter,
		125	static_state->mag_img_filter)) {
		126	LLVMValueRef b1, b2;
		127	b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
		128	b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
		129	if (use_border) {
		130	use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
		131	use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
		132	}
		133	else {
		134	use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
		135	}
		136	}
		137
		138	/* convert x,y,z coords to linear offset from start of texture, in bytes */
		139	lp_build_sample_offset(&bld->int_coord_bld,
		140	bld->format_desc,
		141	x, y, z, y_stride, z_stride,
		142	&offset, &i, &j);
		143	if (mipoffsets) {
		144	offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
		145	}
		146
		147	if (use_border) {
		148	/* If we can sample the border color, it means that texcoords may
		149	* lie outside the bounds of the texture image. We need to do
		150	* something to prevent reading out of bounds and causing a segfault.
		151	*
		152	* Simply AND the texture coords with !use_border. This will cause
		153	* coords which are out of bounds to become zero. Zero's guaranteed
		154	* to be inside the texture image.
		155	*/
		156	offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
		157	}
		158
		159	lp_build_fetch_rgba_soa(bld->gallivm,
		160	bld->format_desc,
		161	bld->texel_type,
		162	data_ptr, offset,
		163	i, j,
		164	texel_out);
		165
		166	/*
		167	* Note: if we find an app which frequently samples the texture border
		168	* we might want to implement a true conditional here to avoid sampling
		169	* the texture whenever possible (since that's quite a bit of code).
		170	* Ex:
		171	* if (use_border) {
		172	* texel = border_color;
		173	* }
		174	* else {
		175	* texel = sample_texture(coord);
		176	* }
		177	* As it is now, we always sample the texture, then selectively replace
		178	* the texel color results with the border color.
		179	*/
		180
		181	if (use_border) {
		182	/* select texel color or border color depending on use_border. */
		183	const struct util_format_description *format_desc = bld->format_desc;
		184	int chan;
		185	struct lp_type border_type = bld->texel_type;
		186	border_type.length = 4;
		187	/*
		188	* Only replace channels which are actually present. The others should
		189	* get optimized away eventually by sampler_view swizzle anyway but it's
		190	* easier too.
		191	*/
		192	for (chan = 0; chan < 4; chan++) {
		193	unsigned chan_s;
		194	/* reverse-map channel... */
		195	for (chan_s = 0; chan_s < 4; chan_s++) {
		196	if (chan_s == format_desc->swizzle[chan]) {
		197	break;
		198	}
		199	}
		200	if (chan_s <= 3) {
		201	/* use the already clamped color */
		202	LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
		203	LLVMValueRef border_chan;
		204
		205	border_chan = lp_build_extract_broadcast(bld->gallivm,
		206	border_type,
		207	bld->texel_type,
		208	bld->border_color_clamped,
		209	idx);
		210	texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
		211	border_chan, texel_out[chan]);
		212	}
		213	}
		214	}
		215	}
		216
		217
		218	/**
		219	* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
		220	*/
		221	static LLVMValueRef
		222	lp_build_coord_mirror(struct lp_build_sample_context *bld,
		223	LLVMValueRef coord)
		224	{
		225	struct lp_build_context *coord_bld = &bld->coord_bld;
		226	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		227	LLVMValueRef fract, flr, isOdd;
		228
		229	lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
		230
		231	/* isOdd = flr & 1 */
		232	isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
		233
		234	/* make coord positive or negative depending on isOdd */
		235	coord = lp_build_set_sign(coord_bld, fract, isOdd);
		236
		237	/* convert isOdd to float */
		238	isOdd = lp_build_int_to_float(coord_bld, isOdd);
		239
		240	/* add isOdd to coord */
		241	coord = lp_build_add(coord_bld, coord, isOdd);
		242
		243	return coord;
		244	}
		245
		246
		247	/**
		248	* Helper to compute the first coord and the weight for
		249	* linear wrap repeat npot textures
		250	*/
		251	void
		252	lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
		253	LLVMValueRef coord_f,
		254	LLVMValueRef length_i,
		255	LLVMValueRef length_f,
		256	LLVMValueRef *coord0_i,
		257	LLVMValueRef *weight_f)
		258	{
		259	struct lp_build_context *coord_bld = &bld->coord_bld;
		260	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		261	LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
		262	LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
		263	int_coord_bld->one);
		264	LLVMValueRef mask;
		265	/* wrap with normalized floats is just fract */
		266	coord_f = lp_build_fract(coord_bld, coord_f);
		267	/* mul by size and subtract 0.5 */
		268	coord_f = lp_build_mul(coord_bld, coord_f, length_f);
		269	coord_f = lp_build_sub(coord_bld, coord_f, half);
		270	/*
		271	* we avoided the 0.5/length division before the repeat wrap,
		272	* now need to fix up edge cases with selects
		273	*/
		274	/* convert to int, compute lerp weight */
		275	lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
		276	mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
		277	PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
		278	coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, coord0_i);
		279	}
		280
		281
		282	/**
		283	* Build LLVM code for texture wrap mode for linear filtering.
		284	* \param x0_out returns first integer texcoord
		285	* \param x1_out returns second integer texcoord
		286	* \param weight_out returns linear interpolation weight
		287	*/
		288	static void
		289	lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
		290	LLVMValueRef coord,
		291	LLVMValueRef length,
		292	LLVMValueRef length_f,
		293	LLVMValueRef offset,
		294	boolean is_pot,
		295	unsigned wrap_mode,
		296	LLVMValueRef *x0_out,
		297	LLVMValueRef *x1_out,
		298	LLVMValueRef *weight_out)
		299	{
		300	struct lp_build_context *coord_bld = &bld->coord_bld;
		301	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		302	LLVMBuilderRef builder = bld->gallivm->builder;
		303	LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
		304	LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
		305	LLVMValueRef coord0, coord1, weight;
		306
		307	switch(wrap_mode) {
		308	case PIPE_TEX_WRAP_REPEAT:
		309	if (is_pot) {
		310	/* mul by size and subtract 0.5 */
		311	coord = lp_build_mul(coord_bld, coord, length_f);
		312	coord = lp_build_sub(coord_bld, coord, half);
		313	if (offset) {
		314	offset = lp_build_int_to_float(coord_bld, offset);
		315	coord = lp_build_add(coord_bld, coord, offset);
		316	}
		317	/* convert to int, compute lerp weight */
		318	lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
		319	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		320	/* repeat wrap */
		321	coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
		322	coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
		323	}
		324	else {
		325	LLVMValueRef mask;
		326	if (offset) {
		327	offset = lp_build_int_to_float(coord_bld, offset);
		328	offset = lp_build_div(coord_bld, offset, length_f);
		329	coord = lp_build_add(coord_bld, coord, offset);
		330	}
		331	lp_build_coord_repeat_npot_linear(bld, coord,
		332	length, length_f,
		333	&coord0, &weight);
		334	mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
		335	PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
		336	coord1 = LLVMBuildAnd(builder,
		337	lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
		338	mask, "");
		339	}
		340	break;
		341
		342	case PIPE_TEX_WRAP_CLAMP:
		343	if (bld->static_sampler_state->normalized_coords) {
		344	/* scale coord to length */
		345	coord = lp_build_mul(coord_bld, coord, length_f);
		346	}
		347	if (offset) {
		348	offset = lp_build_int_to_float(coord_bld, offset);
		349	coord = lp_build_add(coord_bld, coord, offset);
		350	}
		351
		352	/* clamp to [0, length] */
		353	coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
		354
		355	coord = lp_build_sub(coord_bld, coord, half);
		356
		357	/* convert to int, compute lerp weight */
		358	lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
		359	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		360	break;
		361
		362	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		363	{
		364	struct lp_build_context abs_coord_bld = bld->coord_bld;
		365	abs_coord_bld.type.sign = FALSE;
		366
		367	if (bld->static_sampler_state->normalized_coords) {
		368	/* mul by tex size */
		369	coord = lp_build_mul(coord_bld, coord, length_f);
		370	}
		371	if (offset) {
		372	offset = lp_build_int_to_float(coord_bld, offset);
		373	coord = lp_build_add(coord_bld, coord, offset);
		374	}
		375
		376	/* clamp to length max */
		377	coord = lp_build_min(coord_bld, coord, length_f);
		378	/* subtract 0.5 */
		379	coord = lp_build_sub(coord_bld, coord, half);
		380	/* clamp to [0, length - 0.5] */
		381	coord = lp_build_max(coord_bld, coord, coord_bld->zero);
		382	/* convert to int, compute lerp weight */
		383	lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
		384	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		385	/* coord1 = min(coord1, length-1) */
		386	coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
		387	break;
		388	}
		389
		390	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
		391	if (bld->static_sampler_state->normalized_coords) {
		392	/* scale coord to length */
		393	coord = lp_build_mul(coord_bld, coord, length_f);
		394	}
		395	if (offset) {
		396	offset = lp_build_int_to_float(coord_bld, offset);
		397	coord = lp_build_add(coord_bld, coord, offset);
		398	}
		399	/* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
		400	/* can skip clamp (though might not work for very large coord values */
		401	coord = lp_build_sub(coord_bld, coord, half);
		402	/* convert to int, compute lerp weight */
		403	lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
		404	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		405	break;
		406
		407	case PIPE_TEX_WRAP_MIRROR_REPEAT:
		408	/* compute mirror function */
		409	coord = lp_build_coord_mirror(bld, coord);
		410
		411	/* scale coord to length */
		412	coord = lp_build_mul(coord_bld, coord, length_f);
		413	coord = lp_build_sub(coord_bld, coord, half);
		414	if (offset) {
		415	offset = lp_build_int_to_float(coord_bld, offset);
		416	coord = lp_build_add(coord_bld, coord, offset);
		417	}
		418
		419	/* convert to int, compute lerp weight */
		420	lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
		421	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		422
		423	/* coord0 = max(coord0, 0) */
		424	coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
		425	/* coord1 = min(coord1, length-1) */
		426	coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
		427	break;
		428
		429	case PIPE_TEX_WRAP_MIRROR_CLAMP:
		430	if (bld->static_sampler_state->normalized_coords) {
		431	/* scale coord to length */
		432	coord = lp_build_mul(coord_bld, coord, length_f);
		433	}
		434	if (offset) {
		435	offset = lp_build_int_to_float(coord_bld, offset);
		436	coord = lp_build_add(coord_bld, coord, offset);
		437	}
		438	coord = lp_build_abs(coord_bld, coord);
		439
		440	/* clamp to [0, length] */
		441	coord = lp_build_min(coord_bld, coord, length_f);
		442
		443	coord = lp_build_sub(coord_bld, coord, half);
		444
		445	/* convert to int, compute lerp weight */
		446	lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
		447	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		448	break;
		449
		450	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
		451	{
		452	struct lp_build_context abs_coord_bld = bld->coord_bld;
		453	abs_coord_bld.type.sign = FALSE;
		454
		455	if (bld->static_sampler_state->normalized_coords) {
		456	/* scale coord to length */
		457	coord = lp_build_mul(coord_bld, coord, length_f);
		458	}
		459	if (offset) {
		460	offset = lp_build_int_to_float(coord_bld, offset);
		461	coord = lp_build_add(coord_bld, coord, offset);
		462	}
		463	coord = lp_build_abs(coord_bld, coord);
		464
		465	/* clamp to length max */
		466	coord = lp_build_min(coord_bld, coord, length_f);
		467	/* subtract 0.5 */
		468	coord = lp_build_sub(coord_bld, coord, half);
		469	/* clamp to [0, length - 0.5] */
		470	coord = lp_build_max(coord_bld, coord, coord_bld->zero);
		471
		472	/* convert to int, compute lerp weight */
		473	lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
		474	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		475	/* coord1 = min(coord1, length-1) */
		476	coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
		477	}
		478	break;
		479
		480	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
		481	{
		482	if (bld->static_sampler_state->normalized_coords) {
		483	/* scale coord to length */
		484	coord = lp_build_mul(coord_bld, coord, length_f);
		485	}
		486	if (offset) {
		487	offset = lp_build_int_to_float(coord_bld, offset);
		488	coord = lp_build_add(coord_bld, coord, offset);
		489	}
		490	coord = lp_build_abs(coord_bld, coord);
		491
		492	/* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
		493	/* skip clamp - always positive, and other side
		494	only potentially matters for very large coords */
		495	coord = lp_build_sub(coord_bld, coord, half);
		496
		497	/* convert to int, compute lerp weight */
		498	lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
		499	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		500	}
		501	break;
		502
		503	default:
		504	assert(0);
		505	coord0 = NULL;
		506	coord1 = NULL;
		507	weight = NULL;
		508	}
		509
		510	*x0_out = coord0;
		511	*x1_out = coord1;
		512	*weight_out = weight;
		513	}
		514
		515
		516	/**
		517	* Build LLVM code for texture wrap mode for nearest filtering.
		518	* \param coord the incoming texcoord (nominally in [0,1])
		519	* \param length the texture size along one dimension, as int vector
		520	* \param length_f the texture size along one dimension, as float vector
		521	* \param offset texel offset along one dimension (as int vector)
		522	* \param is_pot if TRUE, length is a power of two
		523	* \param wrap_mode one of PIPE_TEX_WRAP_x
		524	*/
		525	static LLVMValueRef
		526	lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
		527	LLVMValueRef coord,
		528	LLVMValueRef length,
		529	LLVMValueRef length_f,
		530	LLVMValueRef offset,
		531	boolean is_pot,
		532	unsigned wrap_mode)
		533	{
		534	struct lp_build_context *coord_bld = &bld->coord_bld;
		535	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		536	LLVMBuilderRef builder = bld->gallivm->builder;
		537	LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
		538	LLVMValueRef icoord;
		539
		540	switch(wrap_mode) {
		541	case PIPE_TEX_WRAP_REPEAT:
		542	if (is_pot) {
		543	coord = lp_build_mul(coord_bld, coord, length_f);
		544	icoord = lp_build_ifloor(coord_bld, coord);
		545	if (offset) {
		546	icoord = lp_build_add(int_coord_bld, icoord, offset);
		547	}
		548	icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
		549	}
		550	else {
		551	if (offset) {
		552	offset = lp_build_int_to_float(coord_bld, offset);
		553	offset = lp_build_div(coord_bld, offset, length_f);
		554	coord = lp_build_add(coord_bld, coord, offset);
		555	}
		556	/* take fraction, unnormalize */
		557	coord = lp_build_fract_safe(coord_bld, coord);
		558	coord = lp_build_mul(coord_bld, coord, length_f);
		559	icoord = lp_build_itrunc(coord_bld, coord);
		560	}
		561	break;
		562
		563	case PIPE_TEX_WRAP_CLAMP:
		564	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		565	if (bld->static_sampler_state->normalized_coords) {
		566	/* scale coord to length */
		567	coord = lp_build_mul(coord_bld, coord, length_f);
		568	}
		569
		570	/* floor */
		571	/* use itrunc instead since we clamp to 0 anyway */
		572	icoord = lp_build_itrunc(coord_bld, coord);
		573	if (offset) {
		574	icoord = lp_build_add(int_coord_bld, icoord, offset);
		575	}
		576
		577	/* clamp to [0, length - 1]. */
		578	icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
		579	length_minus_one);
		580	break;
		581
		582	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
		583	if (bld->static_sampler_state->normalized_coords) {
		584	/* scale coord to length */
		585	coord = lp_build_mul(coord_bld, coord, length_f);
		586	}
		587	/* no clamp necessary, border masking will handle this */
		588	icoord = lp_build_ifloor(coord_bld, coord);
		589	if (offset) {
		590	icoord = lp_build_add(int_coord_bld, icoord, offset);
		591	}
		592	break;
		593
		594	case PIPE_TEX_WRAP_MIRROR_REPEAT:
		595	if (offset) {
		596	offset = lp_build_int_to_float(coord_bld, offset);
		597	offset = lp_build_div(coord_bld, offset, length_f);
		598	coord = lp_build_add(coord_bld, coord, offset);
		599	}
		600	/* compute mirror function */
		601	coord = lp_build_coord_mirror(bld, coord);
		602
		603	/* scale coord to length */
		604	assert(bld->static_sampler_state->normalized_coords);
		605	coord = lp_build_mul(coord_bld, coord, length_f);
		606
		607	/* itrunc == ifloor here */
		608	icoord = lp_build_itrunc(coord_bld, coord);
		609
		610	/* clamp to [0, length - 1] */
		611	icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
		612	break;
		613
		614	case PIPE_TEX_WRAP_MIRROR_CLAMP:
		615	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
		616	if (bld->static_sampler_state->normalized_coords) {
		617	/* scale coord to length */
		618	coord = lp_build_mul(coord_bld, coord, length_f);
		619	}
		620	if (offset) {
		621	offset = lp_build_int_to_float(coord_bld, offset);
		622	coord = lp_build_add(coord_bld, coord, offset);
		623	}
		624	coord = lp_build_abs(coord_bld, coord);
		625
		626	/* itrunc == ifloor here */
		627	icoord = lp_build_itrunc(coord_bld, coord);
		628
		629	/* clamp to [0, length - 1] */
		630	icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
		631	break;
		632
		633	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
		634	if (bld->static_sampler_state->normalized_coords) {
		635	/* scale coord to length */
		636	coord = lp_build_mul(coord_bld, coord, length_f);
		637	}
		638	if (offset) {
		639	offset = lp_build_int_to_float(coord_bld, offset);
		640	coord = lp_build_add(coord_bld, coord, offset);
		641	}
		642	coord = lp_build_abs(coord_bld, coord);
		643
		644	/* itrunc == ifloor here */
		645	icoord = lp_build_itrunc(coord_bld, coord);
		646	break;
		647
		648	default:
		649	assert(0);
		650	icoord = NULL;
		651	}
		652
		653	return icoord;
		654	}
		655
		656
		657	/**
		658	* Do shadow test/comparison.
		659	* \param p shadow ref value
		660	* \param texel the texel to compare against
		661	*/
		662	static LLVMValueRef
		663	lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
		664	LLVMValueRef p,
		665	LLVMValueRef texel)
		666	{
		667	struct lp_build_context *texel_bld = &bld->texel_bld;
		668	LLVMValueRef res;
		669
		670	if (0) {
		671	//lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
		672	lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
		673	}
		674
		675	/* result = (p FUNC texel) ? 1 : 0 */
		676	/*
		677	* honor d3d10 floating point rules here, which state that comparisons
		678	* are ordered except NOT_EQUAL which is unordered.
		679	*/
		680	if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
		681	res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
		682	p, texel);
		683	}
		684	else {
		685	res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
		686	p, texel);
		687	}
		688	return res;
		689	}
		690
		691
		692	/**
		693	* Generate code to sample a mipmap level with nearest filtering.
		694	* If sampling a cube texture, r = cube face in [0,5].
		695	*/
		696	static void
		697	lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
		698	LLVMValueRef size,
		699	LLVMValueRef row_stride_vec,
		700	LLVMValueRef img_stride_vec,
		701	LLVMValueRef data_ptr,
		702	LLVMValueRef mipoffsets,
		703	LLVMValueRef *coords,
		704	const LLVMValueRef *offsets,
		705	LLVMValueRef colors_out[4])
		706	{
		707	const unsigned dims = bld->dims;
		708	LLVMValueRef width_vec;
		709	LLVMValueRef height_vec;
		710	LLVMValueRef depth_vec;
		711	LLVMValueRef flt_size;
		712	LLVMValueRef flt_width_vec;
		713	LLVMValueRef flt_height_vec;
		714	LLVMValueRef flt_depth_vec;
		715	LLVMValueRef x, y = NULL, z = NULL;
		716
		717	lp_build_extract_image_sizes(bld,
		718	&bld->int_size_bld,
		719	bld->int_coord_type,
		720	size,
		721	&width_vec, &height_vec, &depth_vec);
		722
		723	flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
		724
		725	lp_build_extract_image_sizes(bld,
		726	&bld->float_size_bld,
		727	bld->coord_type,
		728	flt_size,
		729	&flt_width_vec, &flt_height_vec, &flt_depth_vec);
		730
		731	/*
		732	* Compute integer texcoords.
		733	*/
		734	x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
		735	flt_width_vec, offsets[0],
		736	bld->static_texture_state->pot_width,
		737	bld->static_sampler_state->wrap_s);
		738	lp_build_name(x, "tex.x.wrapped");
		739
		740	if (dims >= 2) {
		741	y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
		742	flt_height_vec, offsets[1],
		743	bld->static_texture_state->pot_height,
		744	bld->static_sampler_state->wrap_t);
		745	lp_build_name(y, "tex.y.wrapped");
		746
		747	if (dims == 3) {
		748	z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
		749	flt_depth_vec, offsets[2],
		750	bld->static_texture_state->pot_depth,
		751	bld->static_sampler_state->wrap_r);
		752	lp_build_name(z, "tex.z.wrapped");
		753	}
		754	}
		755	if (has_layer_coord(bld->static_texture_state->target)) {
		756	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
		757	/* add cube layer to face */
		758	z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
		759	}
		760	else {
		761	z = coords[2];
		762	}
		763	lp_build_name(z, "tex.z.layer");
		764	}
		765
		766	/*
		767	* Get texture colors.
		768	*/
		769	lp_build_sample_texel_soa(bld,
		770	width_vec, height_vec, depth_vec,
		771	x, y, z,
		772	row_stride_vec, img_stride_vec,
		773	data_ptr, mipoffsets, colors_out);
		774
		775	if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
		776	LLVMValueRef cmpval;
		777	cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
		778	/* this is really just a AND 1.0, cmpval but llvm is clever enough */
		779	colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
		780	bld->texel_bld.one, bld->texel_bld.zero);
		781	colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
		782	}
		783
		784	}
		785
		786
		787	/**
		788	* Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
		789	*/
		790	static LLVMValueRef
		791	lp_build_masklerp(struct lp_build_context *bld,
		792	LLVMValueRef weight,
		793	LLVMValueRef mask0,
		794	LLVMValueRef mask1)
		795	{
		796	struct gallivm_state *gallivm = bld->gallivm;
		797	LLVMBuilderRef builder = gallivm->builder;
		798	LLVMValueRef weight2;
		799
		800	weight2 = lp_build_sub(bld, bld->one, weight);
		801	weight = LLVMBuildBitCast(builder, weight,
		802	lp_build_int_vec_type(gallivm, bld->type), "");
		803	weight2 = LLVMBuildBitCast(builder, weight2,
		804	lp_build_int_vec_type(gallivm, bld->type), "");
		805	weight = LLVMBuildAnd(builder, weight, mask1, "");
		806	weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
		807	weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
		808	weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
		809	return lp_build_add(bld, weight, weight2);
		810	}
		811
		812	/**
		813	* Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
		814	*/
		815	static LLVMValueRef
		816	lp_build_masklerp2d(struct lp_build_context *bld,
		817	LLVMValueRef weight0,
		818	LLVMValueRef weight1,
		819	LLVMValueRef mask00,
		820	LLVMValueRef mask01,
		821	LLVMValueRef mask10,
		822	LLVMValueRef mask11)
		823	{
		824	LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
		825	LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
		826	return lp_build_lerp(bld, weight1, val0, val1, 0);
		827	}
		828
		829	/*
		830	* this is a bit excessive code for something OpenGL just recommends
		831	* but does not require.
		832	*/
		833	#define ACCURATE_CUBE_CORNERS 1
		834
		835	/**
		836	* Generate code to sample a mipmap level with linear filtering.
		837	* If sampling a cube texture, r = cube face in [0,5].
		838	* If linear_mask is present, only pixels having their mask set
		839	* will receive linear filtering, the rest will use nearest.
		840	*/
		841	static void
		842	lp_build_sample_image_linear(struct lp_build_sample_context *bld,
		843	boolean is_gather,
		844	LLVMValueRef size,
		845	LLVMValueRef linear_mask,
		846	LLVMValueRef row_stride_vec,
		847	LLVMValueRef img_stride_vec,
		848	LLVMValueRef data_ptr,
		849	LLVMValueRef mipoffsets,
		850	LLVMValueRef *coords,
		851	const LLVMValueRef *offsets,
		852	LLVMValueRef colors_out[4])
		853	{
		854	LLVMBuilderRef builder = bld->gallivm->builder;
		855	struct lp_build_context *ivec_bld = &bld->int_coord_bld;
		856	struct lp_build_context *coord_bld = &bld->coord_bld;
		857	struct lp_build_context *texel_bld = &bld->texel_bld;
		858	const unsigned dims = bld->dims;
		859	LLVMValueRef width_vec;
		860	LLVMValueRef height_vec;
		861	LLVMValueRef depth_vec;
		862	LLVMValueRef flt_size;
		863	LLVMValueRef flt_width_vec;
		864	LLVMValueRef flt_height_vec;
		865	LLVMValueRef flt_depth_vec;
		866	LLVMValueRef fall_off[4], have_corners;
		867	LLVMValueRef z1 = NULL;
		868	LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
		869	LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
		870	LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
		871	LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
		872	LLVMValueRef xs[4], ys[4], zs[4];
		873	LLVMValueRef neighbors[2][2][4];
		874	int chan, texel_index;
		875	boolean seamless_cube_filter, accurate_cube_corners;
		876
		877	seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		878	bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
		879	bld->static_sampler_state->seamless_cube_map;
		880	/*
		881	* XXX I don't know how this is really supposed to work with gather. From GL
		882	* spec wording (not gather specific) it sounds like the 4th missing texel
		883	* should be an average of the other 3, hence for gather could return this.
		884	* This is however NOT how the code here works, which just fixes up the
		885	* weights used for filtering instead. And of course for gather there is
		886	* no filter to tweak...
		887	*/
		888	accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
		889	!is_gather;
		890
		891	lp_build_extract_image_sizes(bld,
		892	&bld->int_size_bld,
		893	bld->int_coord_type,
		894	size,
		895	&width_vec, &height_vec, &depth_vec);
		896
		897	flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
		898
		899	lp_build_extract_image_sizes(bld,
		900	&bld->float_size_bld,
		901	bld->coord_type,
		902	flt_size,
		903	&flt_width_vec, &flt_height_vec, &flt_depth_vec);
		904
		905	/*
		906	* Compute integer texcoords.
		907	*/
		908
		909	if (!seamless_cube_filter) {
		910	lp_build_sample_wrap_linear(bld, coords[0], width_vec,
		911	flt_width_vec, offsets[0],
		912	bld->static_texture_state->pot_width,
		913	bld->static_sampler_state->wrap_s,
		914	&x00, &x01, &s_fpart);
		915	lp_build_name(x00, "tex.x0.wrapped");
		916	lp_build_name(x01, "tex.x1.wrapped");
		917	x10 = x00;
		918	x11 = x01;
		919
		920	if (dims >= 2) {
		921	lp_build_sample_wrap_linear(bld, coords[1], height_vec,
		922	flt_height_vec, offsets[1],
		923	bld->static_texture_state->pot_height,
		924	bld->static_sampler_state->wrap_t,
		925	&y00, &y10, &t_fpart);
		926	lp_build_name(y00, "tex.y0.wrapped");
		927	lp_build_name(y10, "tex.y1.wrapped");
		928	y01 = y00;
		929	y11 = y10;
		930
		931	if (dims == 3) {
		932	lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
		933	flt_depth_vec, offsets[2],
		934	bld->static_texture_state->pot_depth,
		935	bld->static_sampler_state->wrap_r,
		936	&z00, &z1, &r_fpart);
		937	z01 = z10 = z11 = z00;
		938	lp_build_name(z00, "tex.z0.wrapped");
		939	lp_build_name(z1, "tex.z1.wrapped");
		940	}
		941	}
		942	if (has_layer_coord(bld->static_texture_state->target)) {
		943	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
		944	/* add cube layer to face */
		945	z00 = z01 = z10 = z11 = z1 =
		946	lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
		947	}
		948	else {
		949	z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
		950	}
		951	lp_build_name(z00, "tex.z0.layer");
		952	lp_build_name(z1, "tex.z1.layer");
		953	}
		954	}
		955	else {
		956	struct lp_build_if_state edge_if;
		957	LLVMTypeRef int1t;
		958	LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
		959	LLVMValueRef coord, have_edge, have_corner;
		960	LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
		961	LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
		962	LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
		963	LLVMValueRef face = coords[2];
		964	LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
		965	LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
		966	/* XXX drop height calcs. Could (should) do this without seamless filtering too */
		967	height_vec = width_vec;
		968	flt_height_vec = flt_width_vec;
		969
		970	/* XXX the overflow logic is actually sort of duplicated with trilinear,
		971	* since an overflow in one mip should also have a corresponding overflow
		972	* in another.
		973	*/
		974	/* should always have normalized coords, and offsets are undefined */
		975	assert(bld->static_sampler_state->normalized_coords);
		976	coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
		977	/* instead of clamp, build mask if overflowed */
		978	coord = lp_build_sub(coord_bld, coord, half);
		979	/* convert to int, compute lerp weight */
		980	/* not ideal with AVX (and no AVX2) */
		981	lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
		982	x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
		983	coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
		984	coord = lp_build_sub(coord_bld, coord, half);
		985	lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
		986	y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
		987
		988	fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
		989	fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
		990	fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
		991	fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
		992
		993	fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
		994	fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
		995	have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
		996	have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
		997
		998	/* needed for accurate corner filtering branch later, rely on 0 init */
		999	int1t = LLVMInt1TypeInContext(bld->gallivm->context);
		1000	have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
		1001
		1002	for (texel_index = 0; texel_index < 4; texel_index++) {
		1003	xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
		1004	ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
		1005	zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
		1006	}
		1007
		1008	lp_build_if(&edge_if, bld->gallivm, have_edge);
		1009
		1010	have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
		1011	have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
		1012	LLVMBuildStore(builder, have_corner, have_corners);
		1013
		1014	/*
		1015	* Need to feed clamped values here for cheap corner handling,
		1016	* but only for y coord (as when falling off both edges we only
		1017	* fall off the x one) - this should be sufficient.
		1018	*/
		1019	y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
		1020	y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
		1021
		1022	/*
		1023	* Get all possible new coords.
		1024	*/
		1025	lp_build_cube_new_coords(ivec_bld, face,
		1026	x0, x1, y0_clamped, y1_clamped,
		1027	length_minus_one,
		1028	new_faces, new_xcoords, new_ycoords);
		1029
		1030	/* handle fall off x-, x+ direction */
		1031	/* determine new coords, face (not both fall_off vars can be true at same time) */
		1032	x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
		1033	y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
		1034	x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
		1035	y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
		1036	x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
		1037	y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
		1038	x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
		1039	y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
		1040
		1041	z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
		1042	z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
		1043
		1044	/* handle fall off y-, y+ direction */
		1045	/*
		1046	* Cheap corner logic: just hack up things so a texel doesn't fall
		1047	* off both sides (which means filter weights will be wrong but we'll only
		1048	* use valid texels in the filter).
		1049	* This means however (y) coords must additionally be clamped (see above).
		1050	* This corner handling should be fully OpenGL (but not d3d10) compliant.
		1051	*/
		1052	fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
		1053	fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
		1054	fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
		1055	fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
		1056
		1057	x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
		1058	y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
		1059	x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
		1060	y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
		1061	x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
		1062	y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
		1063	x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
		1064	y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
		1065
		1066	z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
		1067	z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
		1068	z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
		1069	z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
		1070
		1071	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
		1072	/* now can add cube layer to face (per sample) */
		1073	z00 = lp_build_add(ivec_bld, z00, coords[3]);
		1074	z01 = lp_build_add(ivec_bld, z01, coords[3]);
		1075	z10 = lp_build_add(ivec_bld, z10, coords[3]);
		1076	z11 = lp_build_add(ivec_bld, z11, coords[3]);
		1077	}
		1078
		1079	LLVMBuildStore(builder, x00, xs[0]);
		1080	LLVMBuildStore(builder, x01, xs[1]);
		1081	LLVMBuildStore(builder, x10, xs[2]);
		1082	LLVMBuildStore(builder, x11, xs[3]);
		1083	LLVMBuildStore(builder, y00, ys[0]);
		1084	LLVMBuildStore(builder, y01, ys[1]);
		1085	LLVMBuildStore(builder, y10, ys[2]);
		1086	LLVMBuildStore(builder, y11, ys[3]);
		1087	LLVMBuildStore(builder, z00, zs[0]);
		1088	LLVMBuildStore(builder, z01, zs[1]);
		1089	LLVMBuildStore(builder, z10, zs[2]);
		1090	LLVMBuildStore(builder, z11, zs[3]);
		1091
		1092	lp_build_else(&edge_if);
		1093
		1094	LLVMBuildStore(builder, x0, xs[0]);
		1095	LLVMBuildStore(builder, x1, xs[1]);
		1096	LLVMBuildStore(builder, x0, xs[2]);
		1097	LLVMBuildStore(builder, x1, xs[3]);
		1098	LLVMBuildStore(builder, y0, ys[0]);
		1099	LLVMBuildStore(builder, y0, ys[1]);
		1100	LLVMBuildStore(builder, y1, ys[2]);
		1101	LLVMBuildStore(builder, y1, ys[3]);
		1102	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
		1103	LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
		1104	LLVMBuildStore(builder, cube_layer, zs[0]);
		1105	LLVMBuildStore(builder, cube_layer, zs[1]);
		1106	LLVMBuildStore(builder, cube_layer, zs[2]);
		1107	LLVMBuildStore(builder, cube_layer, zs[3]);
		1108	}
		1109	else {
		1110	LLVMBuildStore(builder, face, zs[0]);
		1111	LLVMBuildStore(builder, face, zs[1]);
		1112	LLVMBuildStore(builder, face, zs[2]);
		1113	LLVMBuildStore(builder, face, zs[3]);
		1114	}
		1115
		1116	lp_build_endif(&edge_if);
		1117
		1118	x00 = LLVMBuildLoad(builder, xs[0], "");
		1119	x01 = LLVMBuildLoad(builder, xs[1], "");
		1120	x10 = LLVMBuildLoad(builder, xs[2], "");
		1121	x11 = LLVMBuildLoad(builder, xs[3], "");
		1122	y00 = LLVMBuildLoad(builder, ys[0], "");
		1123	y01 = LLVMBuildLoad(builder, ys[1], "");
		1124	y10 = LLVMBuildLoad(builder, ys[2], "");
		1125	y11 = LLVMBuildLoad(builder, ys[3], "");
		1126	z00 = LLVMBuildLoad(builder, zs[0], "");
		1127	z01 = LLVMBuildLoad(builder, zs[1], "");
		1128	z10 = LLVMBuildLoad(builder, zs[2], "");
		1129	z11 = LLVMBuildLoad(builder, zs[3], "");
		1130	}
		1131
		1132	if (linear_mask) {
		1133	/*
		1134	* Whack filter weights into place. Whatever texel had more weight is
		1135	* the one which should have been selected by nearest filtering hence
		1136	* just use 100% weight for it.
		1137	*/
		1138	struct lp_build_context *c_bld = &bld->coord_bld;
		1139	LLVMValueRef w1_mask, w1_weight;
		1140	LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
		1141
		1142	w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
		1143	/* this select is really just a "and" */
		1144	w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
		1145	s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
		1146	if (dims >= 2) {
		1147	w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
		1148	w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
		1149	t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
		1150	if (dims == 3) {
		1151	w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
		1152	w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
		1153	r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
		1154	}
		1155	}
		1156	}
		1157
		1158	/*
		1159	* Get texture colors.
		1160	*/
		1161	/* get x0/x1 texels */
		1162	lp_build_sample_texel_soa(bld,
		1163	width_vec, height_vec, depth_vec,
		1164	x00, y00, z00,
		1165	row_stride_vec, img_stride_vec,
		1166	data_ptr, mipoffsets, neighbors[0][0]);
		1167	lp_build_sample_texel_soa(bld,
		1168	width_vec, height_vec, depth_vec,
		1169	x01, y01, z01,
		1170	row_stride_vec, img_stride_vec,
		1171	data_ptr, mipoffsets, neighbors[0][1]);
		1172
		1173	if (dims == 1) {
		1174	assert(!is_gather);
		1175	if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
		1176	/* Interpolate two samples from 1D image to produce one color */
		1177	for (chan = 0; chan < 4; chan++) {
		1178	colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
		1179	neighbors[0][0][chan],
		1180	neighbors[0][1][chan],
		1181	0);
		1182	}
		1183	}
		1184	else {
		1185	LLVMValueRef cmpval0, cmpval1;
		1186	cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
		1187	cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
		1188	/* simplified lerp, AND mask with weight and add */
		1189	colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
		1190	cmpval0, cmpval1);
		1191	colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
		1192	}
		1193	}
		1194	else {
		1195	/* 2D/3D texture */
		1196	struct lp_build_if_state corner_if;
		1197	LLVMValueRef colors0[4], colorss[4];
		1198
		1199	/* get x0/x1 texels at y1 */
		1200	lp_build_sample_texel_soa(bld,
		1201	width_vec, height_vec, depth_vec,
		1202	x10, y10, z10,
		1203	row_stride_vec, img_stride_vec,
		1204	data_ptr, mipoffsets, neighbors[1][0]);
		1205	lp_build_sample_texel_soa(bld,
		1206	width_vec, height_vec, depth_vec,
		1207	x11, y11, z11,
		1208	row_stride_vec, img_stride_vec,
		1209	data_ptr, mipoffsets, neighbors[1][1]);
		1210
		1211	/*
		1212	* To avoid having to duplicate linear_mask / fetch code use
		1213	* another branch (with corner condition though edge would work
		1214	* as well) here.
		1215	*/
		1216	if (accurate_cube_corners) {
		1217	LLVMValueRef w00, w01, w10, w11, wx0, wy0;
		1218	LLVMValueRef c_weight, c00, c01, c10, c11;
		1219	LLVMValueRef have_corner, one_third, tmp;
		1220
		1221	colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
		1222	colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
		1223	colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
		1224	colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
		1225
		1226	have_corner = LLVMBuildLoad(builder, have_corners, "");
		1227
		1228	lp_build_if(&corner_if, bld->gallivm, have_corner);
		1229
		1230	/*
		1231	* we can't use standard 2d lerp as we need per-element weight
		1232	* in case of corners, so just calculate bilinear result as
		1233	* w00s00 + w01s01 + w10s10 + w11s11.
		1234	* (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
		1235	* however calculating the weights needs another 6, so actually probably
		1236	* not slower than 2d lerp only for 4 channels as weights only need
		1237	* to be calculated once - of course fixing the weights has additional cost.)
		1238	*/
		1239	wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
		1240	wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
		1241	w00 = lp_build_mul(coord_bld, wx0, wy0);
		1242	w01 = lp_build_mul(coord_bld, s_fpart, wy0);
		1243	w10 = lp_build_mul(coord_bld, wx0, t_fpart);
		1244	w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
		1245
		1246	/* find corner weight */
		1247	c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
		1248	c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
		1249	c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
		1250	c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
		1251	c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
		1252	c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
		1253	c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
		1254	c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
		1255
		1256	/*
		1257	* add 1/3 of the corner weight to each of the 3 other samples
		1258	* and null out corner weight
		1259	*/
		1260	one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
		1261	c_weight = lp_build_mul(coord_bld, c_weight, one_third);
		1262	w00 = lp_build_add(coord_bld, w00, c_weight);
		1263	c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
		1264	w00 = lp_build_andnot(coord_bld, w00, c00);
		1265	w01 = lp_build_add(coord_bld, w01, c_weight);
		1266	c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
		1267	w01 = lp_build_andnot(coord_bld, w01, c01);
		1268	w10 = lp_build_add(coord_bld, w10, c_weight);
		1269	c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
		1270	w10 = lp_build_andnot(coord_bld, w10, c10);
		1271	w11 = lp_build_add(coord_bld, w11, c_weight);
		1272	c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
		1273	w11 = lp_build_andnot(coord_bld, w11, c11);
		1274
		1275	if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
		1276	for (chan = 0; chan < 4; chan++) {
		1277	colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
		1278	tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
		1279	colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
		1280	tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
		1281	colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
		1282	tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
		1283	colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
		1284	}
		1285	}
		1286	else {
		1287	LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
		1288	cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
		1289	cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
		1290	cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
		1291	cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
		1292	/* inputs to interpolation are just masks so just add masked weights together */
		1293	cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
		1294	cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
		1295	cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
		1296	cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
		1297	colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
		1298	tmp = lp_build_and(coord_bld, w01, cmpval01);
		1299	colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
		1300	tmp = lp_build_and(coord_bld, w10, cmpval10);
		1301	colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
		1302	tmp = lp_build_and(coord_bld, w11, cmpval11);
		1303	colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
		1304	colors0[1] = colors0[2] = colors0[3] = colors0[0];
		1305	}
		1306
		1307	LLVMBuildStore(builder, colors0[0], colorss[0]);
		1308	LLVMBuildStore(builder, colors0[1], colorss[1]);
		1309	LLVMBuildStore(builder, colors0[2], colorss[2]);
		1310	LLVMBuildStore(builder, colors0[3], colorss[3]);
		1311
		1312	lp_build_else(&corner_if);
		1313	}
		1314
		1315	if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
		1316	if (is_gather) {
		1317	/*
		1318	* Just assign the red channel (no component selection yet).
		1319	* This is a bit hackish, we usually do the swizzle at the
		1320	* end of sampling (much less values to swizzle), but this
		1321	* obviously cannot work when using gather.
		1322	*/
		1323	unsigned chan_swiz = bld->static_texture_state->swizzle_r;
		1324	colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
		1325	neighbors[1][0],
		1326	chan_swiz);
		1327	colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
		1328	neighbors[1][1],
		1329	chan_swiz);
		1330	colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
		1331	neighbors[0][1],
		1332	chan_swiz);
		1333	colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
		1334	neighbors[0][0],
		1335	chan_swiz);
		1336	}
		1337	else {
		1338	/* Bilinear interpolate the four samples from the 2D image / 3D slice */
		1339	for (chan = 0; chan < 4; chan++) {
		1340	colors0[chan] = lp_build_lerp_2d(texel_bld,
		1341	s_fpart, t_fpart,
		1342	neighbors[0][0][chan],
		1343	neighbors[0][1][chan],
		1344	neighbors[1][0][chan],
		1345	neighbors[1][1][chan],
		1346	0);
		1347	}
		1348	}
		1349	}
		1350	else {
		1351	LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
		1352	cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
		1353	cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
		1354	cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
		1355	cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
		1356
		1357	if (is_gather) {
		1358	/* more hacks for swizzling, should be X, ONE or ZERO... */
		1359	unsigned chan_swiz = bld->static_texture_state->swizzle_r;
		1360	if (chan_swiz <= PIPE_SWIZZLE_ALPHA) {
		1361	colors0[0] = lp_build_select(texel_bld, cmpval10,
		1362	texel_bld->one, texel_bld->zero);
		1363	colors0[1] = lp_build_select(texel_bld, cmpval11,
		1364	texel_bld->one, texel_bld->zero);
		1365	colors0[2] = lp_build_select(texel_bld, cmpval01,
		1366	texel_bld->one, texel_bld->zero);
		1367	colors0[3] = lp_build_select(texel_bld, cmpval00,
		1368	texel_bld->one, texel_bld->zero);
		1369	}
		1370	else if (chan_swiz == PIPE_SWIZZLE_ZERO) {
		1371	colors0[0] = colors0[1] = colors0[2] = colors0[3] =
		1372	texel_bld->zero;
		1373	}
		1374	else {
		1375	colors0[0] = colors0[1] = colors0[2] = colors0[3] =
		1376	texel_bld->one;
		1377	}
		1378	}
		1379	else {
		1380	colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
		1381	cmpval00, cmpval01, cmpval10, cmpval11);
		1382	colors0[1] = colors0[2] = colors0[3] = colors0[0];
		1383	}
		1384	}
		1385
		1386	if (accurate_cube_corners) {
		1387	LLVMBuildStore(builder, colors0[0], colorss[0]);
		1388	LLVMBuildStore(builder, colors0[1], colorss[1]);
		1389	LLVMBuildStore(builder, colors0[2], colorss[2]);
		1390	LLVMBuildStore(builder, colors0[3], colorss[3]);
		1391
		1392	lp_build_endif(&corner_if);
		1393
		1394	colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
		1395	colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
		1396	colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
		1397	colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
		1398	}
		1399
		1400	if (dims == 3) {
		1401	LLVMValueRef neighbors1[2][2][4];
		1402	LLVMValueRef colors1[4];
		1403
		1404	assert(!is_gather);
		1405
		1406	/* get x0/x1/y0/y1 texels at z1 */
		1407	lp_build_sample_texel_soa(bld,
		1408	width_vec, height_vec, depth_vec,
		1409	x00, y00, z1,
		1410	row_stride_vec, img_stride_vec,
		1411	data_ptr, mipoffsets, neighbors1[0][0]);
		1412	lp_build_sample_texel_soa(bld,
		1413	width_vec, height_vec, depth_vec,
		1414	x01, y01, z1,
		1415	row_stride_vec, img_stride_vec,
		1416	data_ptr, mipoffsets, neighbors1[0][1]);
		1417	lp_build_sample_texel_soa(bld,
		1418	width_vec, height_vec, depth_vec,
		1419	x10, y10, z1,
		1420	row_stride_vec, img_stride_vec,
		1421	data_ptr, mipoffsets, neighbors1[1][0]);
		1422	lp_build_sample_texel_soa(bld,
		1423	width_vec, height_vec, depth_vec,
		1424	x11, y11, z1,
		1425	row_stride_vec, img_stride_vec,
		1426	data_ptr, mipoffsets, neighbors1[1][1]);
		1427
		1428	if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
		1429	/* Bilinear interpolate the four samples from the second Z slice */
		1430	for (chan = 0; chan < 4; chan++) {
		1431	colors1[chan] = lp_build_lerp_2d(texel_bld,
		1432	s_fpart, t_fpart,
		1433	neighbors1[0][0][chan],
		1434	neighbors1[0][1][chan],
		1435	neighbors1[1][0][chan],
		1436	neighbors1[1][1][chan],
		1437	0);
		1438	}
		1439	/* Linearly interpolate the two samples from the two 3D slices */
		1440	for (chan = 0; chan < 4; chan++) {
		1441	colors_out[chan] = lp_build_lerp(texel_bld,
		1442	r_fpart,
		1443	colors0[chan], colors1[chan],
		1444	0);
		1445	}
		1446	}
		1447	else {
		1448	LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
		1449	cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
		1450	cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
		1451	cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
		1452	cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
		1453	colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
		1454	cmpval00, cmpval01, cmpval10, cmpval11);
		1455	/* Linearly interpolate the two samples from the two 3D slices */
		1456	colors_out[0] = lp_build_lerp(texel_bld,
		1457	r_fpart,
		1458	colors0[0], colors1[0],
		1459	0);
		1460	colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
		1461	}
		1462	}
		1463	else {
		1464	/* 2D tex */
		1465	for (chan = 0; chan < 4; chan++) {
		1466	colors_out[chan] = colors0[chan];
		1467	}
		1468	}
		1469	}
		1470	}
		1471
		1472
		1473	/**
		1474	* Sample the texture/mipmap using given image filter and mip filter.
		1475	* ilevel0 and ilevel1 indicate the two mipmap levels to sample
		1476	* from (vectors or scalars).
		1477	* If we're using nearest miplevel sampling the '1' values will be null/unused.
		1478	*/
		1479	static void
		1480	lp_build_sample_mipmap(struct lp_build_sample_context *bld,
		1481	unsigned img_filter,
		1482	unsigned mip_filter,
		1483	boolean is_gather,
		1484	LLVMValueRef *coords,
		1485	const LLVMValueRef *offsets,
		1486	LLVMValueRef ilevel0,
		1487	LLVMValueRef ilevel1,
		1488	LLVMValueRef lod_fpart,
		1489	LLVMValueRef *colors_out)
		1490	{
		1491	LLVMBuilderRef builder = bld->gallivm->builder;
		1492	LLVMValueRef size0 = NULL;
		1493	LLVMValueRef size1 = NULL;
		1494	LLVMValueRef row_stride0_vec = NULL;
		1495	LLVMValueRef row_stride1_vec = NULL;
		1496	LLVMValueRef img_stride0_vec = NULL;
		1497	LLVMValueRef img_stride1_vec = NULL;
		1498	LLVMValueRef data_ptr0 = NULL;
		1499	LLVMValueRef data_ptr1 = NULL;
		1500	LLVMValueRef mipoff0 = NULL;
		1501	LLVMValueRef mipoff1 = NULL;
		1502	LLVMValueRef colors0[4], colors1[4];
		1503	unsigned chan;
		1504
		1505	/* sample the first mipmap level */
		1506	lp_build_mipmap_level_sizes(bld, ilevel0,
		1507	&size0,
		1508	&row_stride0_vec, &img_stride0_vec);
		1509	if (bld->num_mips == 1) {
		1510	data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
		1511	}
		1512	else {
		1513	/* This path should work for num_lods 1 too but slightly less efficient */
		1514	data_ptr0 = bld->base_ptr;
		1515	mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
		1516	}
		1517	if (img_filter == PIPE_TEX_FILTER_NEAREST) {
		1518	lp_build_sample_image_nearest(bld, size0,
		1519	row_stride0_vec, img_stride0_vec,
		1520	data_ptr0, mipoff0, coords, offsets,
		1521	colors0);
		1522	}
		1523	else {
		1524	assert(img_filter == PIPE_TEX_FILTER_LINEAR);
		1525	lp_build_sample_image_linear(bld, is_gather, size0, NULL,
		1526	row_stride0_vec, img_stride0_vec,
		1527	data_ptr0, mipoff0, coords, offsets,
		1528	colors0);
		1529	}
		1530
		1531	/* Store the first level's colors in the output variables */
		1532	for (chan = 0; chan < 4; chan++) {
		1533	LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
		1534	}
		1535
		1536	if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
		1537	struct lp_build_if_state if_ctx;
		1538	LLVMValueRef need_lerp;
		1539
		1540	/* need_lerp = lod_fpart > 0 */
		1541	if (bld->num_lods == 1) {
		1542	need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
		1543	lod_fpart, bld->lodf_bld.zero,
		1544	"need_lerp");
		1545	}
		1546	else {
		1547	/*
		1548	* We'll do mip filtering if any of the quads (or individual
		1549	* pixel in case of per-pixel lod) need it.
		1550	* It might be better to split the vectors here and only fetch/filter
		1551	* quads which need it (if there's one lod per quad).
		1552	*/
		1553	need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
		1554	PIPE_FUNC_GREATER,
		1555	lod_fpart, bld->lodf_bld.zero);
		1556	need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
		1557	}
		1558
		1559	lp_build_if(&if_ctx, bld->gallivm, need_lerp);
		1560	{
		1561	/*
		1562	* We unfortunately need to clamp lod_fpart here since we can get
		1563	* negative values which would screw up filtering if not all
		1564	* lod_fpart values have same sign.
		1565	*/
		1566	lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
		1567	bld->lodf_bld.zero);
		1568	/* sample the second mipmap level */
		1569	lp_build_mipmap_level_sizes(bld, ilevel1,
		1570	&size1,
		1571	&row_stride1_vec, &img_stride1_vec);
		1572	if (bld->num_mips == 1) {
		1573	data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
		1574	}
		1575	else {
		1576	data_ptr1 = bld->base_ptr;
		1577	mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
		1578	}
		1579	if (img_filter == PIPE_TEX_FILTER_NEAREST) {
		1580	lp_build_sample_image_nearest(bld, size1,
		1581	row_stride1_vec, img_stride1_vec,
		1582	data_ptr1, mipoff1, coords, offsets,
		1583	colors1);
		1584	}
		1585	else {
		1586	lp_build_sample_image_linear(bld, FALSE, size1, NULL,
		1587	row_stride1_vec, img_stride1_vec,
		1588	data_ptr1, mipoff1, coords, offsets,
		1589	colors1);
		1590	}
		1591
		1592	/* interpolate samples from the two mipmap levels */
		1593
		1594	if (bld->num_lods != bld->coord_type.length)
		1595	lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
		1596	bld->lodf_bld.type,
		1597	bld->texel_bld.type,
		1598	lod_fpart);
		1599
		1600	for (chan = 0; chan < 4; chan++) {
		1601	colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
		1602	colors0[chan], colors1[chan],
		1603	0);
		1604	LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
		1605	}
		1606	}
		1607	lp_build_endif(&if_ctx);
		1608	}
		1609	}
		1610
		1611
		1612	/**
		1613	* Sample the texture/mipmap using given mip filter, and using
		1614	* both nearest and linear filtering at the same time depending
		1615	* on linear_mask.
		1616	* lod can be per quad but linear_mask is always per pixel.
		1617	* ilevel0 and ilevel1 indicate the two mipmap levels to sample
		1618	* from (vectors or scalars).
		1619	* If we're using nearest miplevel sampling the '1' values will be null/unused.
		1620	*/
		1621	static void
		1622	lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
		1623	LLVMValueRef linear_mask,
		1624	unsigned mip_filter,
		1625	LLVMValueRef *coords,
		1626	const LLVMValueRef *offsets,
		1627	LLVMValueRef ilevel0,
		1628	LLVMValueRef ilevel1,
		1629	LLVMValueRef lod_fpart,
		1630	LLVMValueRef lod_positive,
		1631	LLVMValueRef *colors_out)
		1632	{
		1633	LLVMBuilderRef builder = bld->gallivm->builder;
		1634	LLVMValueRef size0 = NULL;
		1635	LLVMValueRef size1 = NULL;
		1636	LLVMValueRef row_stride0_vec = NULL;
		1637	LLVMValueRef row_stride1_vec = NULL;
		1638	LLVMValueRef img_stride0_vec = NULL;
		1639	LLVMValueRef img_stride1_vec = NULL;
		1640	LLVMValueRef data_ptr0 = NULL;
		1641	LLVMValueRef data_ptr1 = NULL;
		1642	LLVMValueRef mipoff0 = NULL;
		1643	LLVMValueRef mipoff1 = NULL;
		1644	LLVMValueRef colors0[4], colors1[4];
		1645	unsigned chan;
		1646
		1647	/* sample the first mipmap level */
		1648	lp_build_mipmap_level_sizes(bld, ilevel0,
		1649	&size0,
		1650	&row_stride0_vec, &img_stride0_vec);
		1651	if (bld->num_mips == 1) {
		1652	data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
		1653	}
		1654	else {
		1655	/* This path should work for num_lods 1 too but slightly less efficient */
		1656	data_ptr0 = bld->base_ptr;
		1657	mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
		1658	}
		1659
		1660	lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
		1661	row_stride0_vec, img_stride0_vec,
		1662	data_ptr0, mipoff0, coords, offsets,
		1663	colors0);
		1664
		1665	/* Store the first level's colors in the output variables */
		1666	for (chan = 0; chan < 4; chan++) {
		1667	LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
		1668	}
		1669
		1670	if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
		1671	struct lp_build_if_state if_ctx;
		1672	LLVMValueRef need_lerp;
		1673
		1674	/*
		1675	* We'll do mip filtering if any of the quads (or individual
		1676	* pixel in case of per-pixel lod) need it.
		1677	* Note using lod_positive here not lod_fpart since it may be the same
		1678	* condition as that used in the outer "if" in the caller hence llvm
		1679	* should be able to merge the branches in this case.
		1680	*/
		1681	need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
		1682
		1683	lp_build_if(&if_ctx, bld->gallivm, need_lerp);
		1684	{
		1685	/*
		1686	* We unfortunately need to clamp lod_fpart here since we can get
		1687	* negative values which would screw up filtering if not all
		1688	* lod_fpart values have same sign.
		1689	*/
		1690	lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
		1691	bld->lodf_bld.zero);
		1692	/* sample the second mipmap level */
		1693	lp_build_mipmap_level_sizes(bld, ilevel1,
		1694	&size1,
		1695	&row_stride1_vec, &img_stride1_vec);
		1696	if (bld->num_mips == 1) {
		1697	data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
		1698	}
		1699	else {
		1700	data_ptr1 = bld->base_ptr;
		1701	mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
		1702	}
		1703
		1704	lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
		1705	row_stride1_vec, img_stride1_vec,
		1706	data_ptr1, mipoff1, coords, offsets,
		1707	colors1);
		1708
		1709	/* interpolate samples from the two mipmap levels */
		1710
		1711	if (bld->num_lods != bld->coord_type.length)
		1712	lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
		1713	bld->lodf_bld.type,
		1714	bld->texel_bld.type,
		1715	lod_fpart);
		1716
		1717	for (chan = 0; chan < 4; chan++) {
		1718	colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
		1719	colors0[chan], colors1[chan],
		1720	0);
		1721	LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
		1722	}
		1723	}
		1724	lp_build_endif(&if_ctx);
		1725	}
		1726	}
		1727
		1728
		1729	/**
		1730	* Build (per-coord) layer value.
		1731	* Either clamp layer to valid values or fill in optional out_of_bounds
		1732	* value and just return value unclamped.
		1733	*/
		1734	static LLVMValueRef
		1735	lp_build_layer_coord(struct lp_build_sample_context *bld,
		1736	unsigned texture_unit,
		1737	boolean is_cube_array,
		1738	LLVMValueRef layer,
		1739	LLVMValueRef *out_of_bounds)
		1740	{
		1741	LLVMValueRef num_layers;
		1742	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		1743
		1744	num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
		1745	bld->context_ptr, texture_unit);
		1746
		1747	if (out_of_bounds) {
		1748	LLVMValueRef out1, out;
		1749	assert(!is_cube_array);
		1750	num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
		1751	out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
		1752	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
		1753	*out_of_bounds = lp_build_or(int_coord_bld, out, out1);
		1754	return layer;
		1755	}
		1756	else {
		1757	LLVMValueRef maxlayer;
		1758	LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
		1759	bld->int_bld.one;
		1760	maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
		1761	maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
		1762	return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
		1763	}
		1764	}
		1765
		1766
		1767	/**
		1768	* Calculate cube face, lod, mip levels.
		1769	*/
		1770	static void
		1771	lp_build_sample_common(struct lp_build_sample_context *bld,
		1772	unsigned texture_index,
		1773	unsigned sampler_index,
		1774	LLVMValueRef *coords,
		1775	const struct lp_derivatives derivs, / optional */
		1776	LLVMValueRef lod_bias, /* optional */
		1777	LLVMValueRef explicit_lod, /* optional */
		1778	LLVMValueRef *lod_pos_or_zero,
		1779	LLVMValueRef *lod_fpart,
		1780	LLVMValueRef *ilevel0,
		1781	LLVMValueRef *ilevel1)
		1782	{
		1783	const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
		1784	const unsigned min_filter = bld->static_sampler_state->min_img_filter;
		1785	const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
		1786	const unsigned target = bld->static_texture_state->target;
		1787	LLVMValueRef first_level, cube_rho = NULL;
		1788	LLVMValueRef lod_ipart = NULL;
		1789	struct lp_derivatives cube_derivs;
		1790
		1791	/*
		1792	printf("%s mip %d min %d mag %d\n", __FUNCTION__,
		1793	mip_filter, min_filter, mag_filter);
		1794	*/
		1795
		1796	/*
		1797	* Choose cube face, recompute texcoords for the chosen face and
		1798	* compute rho here too (as it requires transform of derivatives).
		1799	*/
		1800	if (target == PIPE_TEXTURE_CUBE \|\| target == PIPE_TEXTURE_CUBE_ARRAY) {
		1801	boolean need_derivs;
		1802	need_derivs = ((min_filter != mag_filter \|\|
		1803	mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
		1804	!bld->static_sampler_state->min_max_lod_equal &&
		1805	!explicit_lod);
		1806	lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
		1807	derivs = &cube_derivs;
		1808	if (target == PIPE_TEXTURE_CUBE_ARRAY) {
		1809	/* calculate cube layer coord now */
		1810	LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
		1811	LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
		1812	layer = lp_build_mul(&bld->int_coord_bld, layer, six);
		1813	coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
		1814	/* because of seamless filtering can't add it to face (coords[2]) here. */
		1815	}
		1816	}
		1817	else if (target == PIPE_TEXTURE_1D_ARRAY \|\|
		1818	target == PIPE_TEXTURE_2D_ARRAY) {
		1819	coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
		1820	coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
		1821	}
		1822
		1823	if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
		1824	/*
		1825	* Clamp p coords to [0,1] for fixed function depth texture format here.
		1826	* Technically this is not entirely correct for unorm depth as the ref value
		1827	* should be converted to the depth format (quantization!) and comparison
		1828	* then done in texture format. This would actually help performance (since
		1829	* only need to do it once and could save the per-sample conversion of texels
		1830	* to floats instead), but it would need more messy code (would need to push
		1831	* at least some bits down to actual fetch so conversion could be skipped,
		1832	* and would have ugly interaction with border color, would need to convert
		1833	* border color to that format too or do some other tricks to make it work).
		1834	*/
		1835	const struct util_format_description *format_desc = bld->format_desc;
		1836	unsigned chan_type;
		1837	/* not entirely sure we couldn't end up with non-valid swizzle here */
		1838	chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
		1839	format_desc->channel[format_desc->swizzle[0]].type :
		1840	UTIL_FORMAT_TYPE_FLOAT;
		1841	if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
		1842	coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
		1843	bld->coord_bld.zero, bld->coord_bld.one);
		1844	}
		1845	}
		1846
		1847	/*
		1848	* Compute the level of detail (float).
		1849	*/
		1850	if (min_filter != mag_filter \|\|
		1851	mip_filter != PIPE_TEX_MIPFILTER_NONE) {
		1852	/* Need to compute lod either to choose mipmap levels or to
		1853	* distinguish between minification/magnification with one mipmap level.
		1854	*/
		1855	lp_build_lod_selector(bld, texture_index, sampler_index,
		1856	coords[0], coords[1], coords[2], cube_rho,
		1857	derivs, lod_bias, explicit_lod,
		1858	mip_filter,
		1859	&lod_ipart, lod_fpart, lod_pos_or_zero);
		1860	} else {
		1861	lod_ipart = bld->lodi_bld.zero;
		1862	*lod_pos_or_zero = bld->lodi_bld.zero;
		1863	}
		1864
		1865	if (bld->num_lods != bld->num_mips) {
		1866	/* only makes sense if there's just a single mip level */
		1867	assert(bld->num_mips == 1);
		1868	lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
		1869	}
		1870
		1871	/*
		1872	* Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
		1873	*/
		1874	switch (mip_filter) {
		1875	default:
		1876	assert(0 && "bad mip_filter value in lp_build_sample_soa()");
		1877	/* fall-through */
		1878	case PIPE_TEX_MIPFILTER_NONE:
		1879	/* always use mip level 0 */
		1880	first_level = bld->dynamic_state->first_level(bld->dynamic_state,
		1881	bld->gallivm, bld->context_ptr,
		1882	texture_index);
		1883	first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
		1884	*ilevel0 = first_level;
		1885	break;
		1886	case PIPE_TEX_MIPFILTER_NEAREST:
		1887	assert(lod_ipart);
		1888	lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
		1889	break;
		1890	case PIPE_TEX_MIPFILTER_LINEAR:
		1891	assert(lod_ipart);
		1892	assert(*lod_fpart);
		1893	lp_build_linear_mip_levels(bld, texture_index,
		1894	lod_ipart, lod_fpart,
		1895	ilevel0, ilevel1);
		1896	break;
		1897	}
		1898	}
		1899
		1900	static void
		1901	lp_build_clamp_border_color(struct lp_build_sample_context *bld,
		1902	unsigned sampler_unit)
		1903	{
		1904	struct gallivm_state *gallivm = bld->gallivm;
		1905	LLVMBuilderRef builder = gallivm->builder;
		1906	LLVMValueRef border_color_ptr =
		1907	bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
		1908	bld->context_ptr, sampler_unit);
		1909	LLVMValueRef border_color;
		1910	const struct util_format_description *format_desc = bld->format_desc;
		1911	struct lp_type vec4_type = bld->texel_type;
		1912	struct lp_build_context vec4_bld;
		1913	LLVMValueRef min_clamp = NULL;
		1914	LLVMValueRef max_clamp = NULL;
		1915
		1916	/*
		1917	* For normalized format need to clamp border color (technically
		1918	* probably should also quantize the data). Really sucks doing this
		1919	* here but can't avoid at least for now since this is part of
		1920	* sampler state and texture format is part of sampler_view state.
		1921	* GL expects also expects clamping for uint/sint formats too so
		1922	* do that as well (d3d10 can't end up here with uint/sint since it
		1923	* only supports them with ld).
		1924	*/
		1925	vec4_type.length = 4;
		1926	lp_build_context_init(&vec4_bld, gallivm, vec4_type);
		1927
		1928	/*
		1929	* Vectorized clamping of border color. Loading is a bit of a hack since
		1930	* we just cast the pointer to float array to pointer to vec4
		1931	* (int or float).
		1932	*/
		1933	border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
		1934	lp_build_const_int32(gallivm, 0));
		1935	border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
		1936	LLVMPointerType(vec4_bld.vec_type, 0), "");
		1937	border_color = LLVMBuildLoad(builder, border_color_ptr, "");
		1938	/* we don't have aligned type in the dynamic state unfortunately */
		1939	lp_set_load_alignment(border_color, 4);
		1940
		1941	/*
		1942	* Instead of having some incredibly complex logic which will try to figure out
		1943	* clamping necessary for each channel, simply use the first channel, and treat
		1944	* mixed signed/unsigned normalized formats specially.
		1945	* (Mixed non-normalized, which wouldn't work at all here, do not exist for a
		1946	* good reason.)
		1947	*/
		1948	if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
		1949	int chan;
		1950	/* d/s needs special handling because both present means just sampling depth */
		1951	if (util_format_is_depth_and_stencil(format_desc->format)) {
		1952	chan = format_desc->swizzle[0];
		1953	}
		1954	else {
		1955	chan = util_format_get_first_non_void_channel(format_desc->format);
		1956	}
		1957	if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
		1958	unsigned chan_type = format_desc->channel[chan].type;
		1959	unsigned chan_norm = format_desc->channel[chan].normalized;
		1960	unsigned chan_pure = format_desc->channel[chan].pure_integer;
		1961	if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
		1962	if (chan_norm) {
		1963	min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
		1964	max_clamp = vec4_bld.one;
		1965	}
		1966	else if (chan_pure) {
		1967	/*
		1968	* Border color was stored as int, hence need min/max clamp
		1969	* only if chan has less than 32 bits..
		1970	*/
		1971	unsigned chan_size = format_desc->channel[chan].size;
		1972	if (chan_size < 32) {
		1973	min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
		1974
		1975	max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
		1976	(1 << (chan_size - 1)) - 1);
		1977	}
		1978	}
		1979	/* TODO: no idea about non-pure, non-normalized! */
		1980	}
		1981	else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
		1982	if (chan_norm) {
		1983	min_clamp = vec4_bld.zero;
		1984	max_clamp = vec4_bld.one;
		1985	}
		1986	/*
		1987	* Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
		1988	* we use Z32_FLOAT_S8X24 to imply sampling depth component
		1989	* and ignoring stencil, which will blow up here if we try to
		1990	* do a uint clamp in a float texel build...
		1991	* And even if we had that format, mesa st also thinks using z24s8
		1992	* means depth sampling ignoring stencil.
		1993	*/
		1994	else if (chan_pure) {
		1995	/*
		1996	* Border color was stored as uint, hence never need min
		1997	* clamp, and only need max clamp if chan has less than 32 bits.
		1998	*/
		1999	unsigned chan_size = format_desc->channel[chan].size;
		2000	if (chan_size < 32) {
		2001	max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
		2002	(1 << chan_size) - 1);
		2003	}
		2004	/* TODO: no idea about non-pure, non-normalized! */
		2005	}
		2006	}
		2007	else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
		2008	/* TODO: I have no idea what clamp this would need if any! */
		2009	}
		2010	}
		2011	/* mixed plain formats (or different pure size) */
		2012	switch (format_desc->format) {
		2013	case PIPE_FORMAT_B10G10R10A2_UINT:
		2014	case PIPE_FORMAT_R10G10B10A2_UINT:
		2015	{
		2016	unsigned max10 = (1 << 10) - 1;
		2017	max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
		2018	max10, (1 << 2) - 1, NULL);
		2019	}
		2020	break;
		2021	case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
		2022	min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
		2023	-1.0F, 0.0F, NULL);
		2024	max_clamp = vec4_bld.one;
		2025	break;
		2026	case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
		2027	case PIPE_FORMAT_R5SG5SB6U_NORM:
		2028	min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
		2029	0.0F, 0.0F, NULL);
		2030	max_clamp = vec4_bld.one;
		2031	break;
		2032	default:
		2033	break;
		2034	}
		2035	}
		2036	else {
		2037	/* cannot figure this out from format description */
		2038	if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
		2039	/* s3tc formats are always unorm */
		2040	min_clamp = vec4_bld.zero;
		2041	max_clamp = vec4_bld.one;
		2042	}
		2043	else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC \|\|
		2044	format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
		2045	switch (format_desc->format) {
		2046	case PIPE_FORMAT_RGTC1_UNORM:
		2047	case PIPE_FORMAT_RGTC2_UNORM:
		2048	case PIPE_FORMAT_LATC1_UNORM:
		2049	case PIPE_FORMAT_LATC2_UNORM:
		2050	case PIPE_FORMAT_ETC1_RGB8:
		2051	min_clamp = vec4_bld.zero;
		2052	max_clamp = vec4_bld.one;
		2053	break;
		2054	case PIPE_FORMAT_RGTC1_SNORM:
		2055	case PIPE_FORMAT_RGTC2_SNORM:
		2056	case PIPE_FORMAT_LATC1_SNORM:
		2057	case PIPE_FORMAT_LATC2_SNORM:
		2058	min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
		2059	max_clamp = vec4_bld.one;
		2060	break;
		2061	default:
		2062	assert(0);
		2063	break;
		2064	}
		2065	}
		2066	/*
		2067	* all others from subsampled/other group, though we don't care
		2068	* about yuv (and should not have any from zs here)
		2069	*/
		2070	else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
		2071	switch (format_desc->format) {
		2072	case PIPE_FORMAT_R8G8_B8G8_UNORM:
		2073	case PIPE_FORMAT_G8R8_G8B8_UNORM:
		2074	case PIPE_FORMAT_G8R8_B8R8_UNORM:
		2075	case PIPE_FORMAT_R8G8_R8B8_UNORM:
		2076	case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
		2077	min_clamp = vec4_bld.zero;
		2078	max_clamp = vec4_bld.one;
		2079	break;
		2080	case PIPE_FORMAT_R8G8Bx_SNORM:
		2081	min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
		2082	max_clamp = vec4_bld.one;
		2083	break;
		2084	/*
		2085	* Note smallfloat formats usually don't need clamping
		2086	* (they still have infinite range) however this is not
		2087	* true for r11g11b10 and r9g9b9e5, which can't represent
		2088	* negative numbers (and additionally r9g9b9e5 can't represent
		2089	* very large numbers). d3d10 seems happy without clamping in
		2090	* this case, but gl spec is pretty clear: "for floating
		2091	* point and integer formats, border values are clamped to
		2092	* the representable range of the format" so do that here.
		2093	*/
		2094	case PIPE_FORMAT_R11G11B10_FLOAT:
		2095	min_clamp = vec4_bld.zero;
		2096	break;
		2097	case PIPE_FORMAT_R9G9B9E5_FLOAT:
		2098	min_clamp = vec4_bld.zero;
		2099	max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
		2100	break;
		2101	default:
		2102	assert(0);
		2103	break;
		2104	}
		2105	}
		2106	}
		2107
		2108	if (min_clamp) {
		2109	border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
		2110	}
		2111	if (max_clamp) {
		2112	border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
		2113	}
		2114
		2115	bld->border_color_clamped = border_color;
		2116	}
		2117
		2118
		2119	/**
		2120	* General texture sampling codegen.
		2121	* This function handles texture sampling for all texture targets (1D,
		2122	* 2D, 3D, cube) and all filtering modes.
		2123	*/
		2124	static void
		2125	lp_build_sample_general(struct lp_build_sample_context *bld,
		2126	unsigned sampler_unit,
		2127	boolean is_gather,
		2128	LLVMValueRef *coords,
		2129	const LLVMValueRef *offsets,
		2130	LLVMValueRef lod_positive,
		2131	LLVMValueRef lod_fpart,
		2132	LLVMValueRef ilevel0,
		2133	LLVMValueRef ilevel1,
		2134	LLVMValueRef *colors_out)
		2135	{
		2136	LLVMBuilderRef builder = bld->gallivm->builder;
		2137	const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
		2138	const unsigned mip_filter = sampler_state->min_mip_filter;
		2139	const unsigned min_filter = sampler_state->min_img_filter;
		2140	const unsigned mag_filter = sampler_state->mag_img_filter;
		2141	LLVMValueRef texels[4];
		2142	unsigned chan;
		2143
		2144	/* if we need border color, (potentially) clamp it now */
		2145	if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
		2146	min_filter,
		2147	mag_filter) \|\|
		2148	(bld->dims > 1 &&
		2149	lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
		2150	min_filter,
		2151	mag_filter)) \|\|
		2152	(bld->dims > 2 &&
		2153	lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
		2154	min_filter,
		2155	mag_filter))) {
		2156	lp_build_clamp_border_color(bld, sampler_unit);
		2157	}
		2158
		2159
		2160	/*
		2161	* Get/interpolate texture colors.
		2162	*/
		2163
		2164	for (chan = 0; chan < 4; ++chan) {
		2165	texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
		2166	lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
		2167	}
		2168
		2169	if (min_filter == mag_filter) {
		2170	/* no need to distinguish between minification and magnification */
		2171	lp_build_sample_mipmap(bld, min_filter, mip_filter,
		2172	is_gather,
		2173	coords, offsets,
		2174	ilevel0, ilevel1, lod_fpart,
		2175	texels);
		2176	}
		2177	else {
		2178	/*
		2179	* Could also get rid of the if-logic and always use mipmap_both, both
		2180	* for the single lod and multi-lod case if nothing really uses this.
		2181	*/
		2182	if (bld->num_lods == 1) {
		2183	/* Emit conditional to choose min image filter or mag image filter
		2184	* depending on the lod being > 0 or <= 0, respectively.
		2185	*/
		2186	struct lp_build_if_state if_ctx;
		2187
		2188	lod_positive = LLVMBuildTrunc(builder, lod_positive,
		2189	LLVMInt1TypeInContext(bld->gallivm->context), "");
		2190
		2191	lp_build_if(&if_ctx, bld->gallivm, lod_positive);
		2192	{
		2193	/* Use the minification filter */
		2194	lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
		2195	coords, offsets,
		2196	ilevel0, ilevel1, lod_fpart,
		2197	texels);
		2198	}
		2199	lp_build_else(&if_ctx);
		2200	{
		2201	/* Use the magnification filter */
		2202	lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
		2203	FALSE,
		2204	coords, offsets,
		2205	ilevel0, NULL, NULL,
		2206	texels);
		2207	}
		2208	lp_build_endif(&if_ctx);
		2209	}
		2210	else {
		2211	LLVMValueRef need_linear, linear_mask;
		2212	unsigned mip_filter_for_nearest;
		2213	struct lp_build_if_state if_ctx;
		2214
		2215	if (min_filter == PIPE_TEX_FILTER_LINEAR) {
		2216	linear_mask = lod_positive;
		2217	mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
		2218	}
		2219	else {
		2220	linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
		2221	mip_filter_for_nearest = mip_filter;
		2222	}
		2223	need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
		2224	linear_mask);
		2225
		2226	if (bld->num_lods != bld->coord_type.length) {
		2227	linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
		2228	bld->lodi_type,
		2229	bld->int_coord_type,
		2230	linear_mask);
		2231	}
		2232
		2233	lp_build_if(&if_ctx, bld->gallivm, need_linear);
		2234	{
		2235	/*
		2236	* Do sampling with both filters simultaneously. This means using
		2237	* a linear filter and doing some tricks (with weights) for the pixels
		2238	* which need nearest filter.
		2239	* Note that it's probably rare some pixels need nearest and some
		2240	* linear filter but the fixups required for the nearest pixels
		2241	* aren't all that complicated so just always run a combined path
		2242	* if at least some pixels require linear.
		2243	*/
		2244	lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
		2245	coords, offsets,
		2246	ilevel0, ilevel1,
		2247	lod_fpart, lod_positive,
		2248	texels);
		2249	}
		2250	lp_build_else(&if_ctx);
		2251	{
		2252	/*
		2253	* All pixels require just nearest filtering, which is way
		2254	* cheaper than linear, hence do a separate path for that.
		2255	*/
		2256	lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, FALSE,
		2257	mip_filter_for_nearest,
		2258	coords, offsets,
		2259	ilevel0, ilevel1, lod_fpart,
		2260	texels);
		2261	}
		2262	lp_build_endif(&if_ctx);
		2263	}
		2264	}
		2265
		2266	for (chan = 0; chan < 4; ++chan) {
		2267	colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
		2268	lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
		2269	}
		2270	}
		2271
		2272
		2273	/**
		2274	* Texel fetch function.
		2275	* In contrast to general sampling there is no filtering, no coord minification,
		2276	* lod (if any) is always explicit uint, coords are uints (in terms of texel units)
		2277	* directly to be applied to the selected mip level (after adding texel offsets).
		2278	* This function handles texel fetch for all targets where texel fetch is supported
		2279	* (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
		2280	*/
		2281	static void
		2282	lp_build_fetch_texel(struct lp_build_sample_context *bld,
		2283	unsigned texture_unit,
		2284	const LLVMValueRef *coords,
		2285	LLVMValueRef explicit_lod,
		2286	const LLVMValueRef *offsets,
		2287	LLVMValueRef *colors_out)
		2288	{
		2289	struct lp_build_context *perquadi_bld = &bld->lodi_bld;
		2290	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		2291	unsigned dims = bld->dims, chan;
		2292	unsigned target = bld->static_texture_state->target;
		2293	boolean out_of_bound_ret_zero = TRUE;
		2294	LLVMValueRef size, ilevel;
		2295	LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
		2296	LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
		2297	LLVMValueRef width, height, depth, i, j;
		2298	LLVMValueRef offset, out_of_bounds, out1;
		2299
		2300	out_of_bounds = int_coord_bld->zero;
		2301
		2302	if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
		2303	if (bld->num_mips != int_coord_bld->type.length) {
		2304	ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
		2305	perquadi_bld->type, explicit_lod, 0);
		2306	}
		2307	else {
		2308	ilevel = explicit_lod;
		2309	}
		2310	lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
		2311	out_of_bound_ret_zero ? &out_of_bounds : NULL);
		2312	}
		2313	else {
		2314	assert(bld->num_mips == 1);
		2315	if (bld->static_texture_state->target != PIPE_BUFFER) {
		2316	ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
		2317	bld->context_ptr, texture_unit);
		2318	}
		2319	else {
		2320	ilevel = lp_build_const_int32(bld->gallivm, 0);
		2321	}
		2322	}
		2323	lp_build_mipmap_level_sizes(bld, ilevel,
		2324	&size,
		2325	&row_stride_vec, &img_stride_vec);
		2326	lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
		2327	size, &width, &height, &depth);
		2328
		2329	if (target == PIPE_TEXTURE_1D_ARRAY \|\|
		2330	target == PIPE_TEXTURE_2D_ARRAY) {
		2331	if (out_of_bound_ret_zero) {
		2332	z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
		2333	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2334	}
		2335	else {
		2336	z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
		2337	}
		2338	}
		2339
		2340	/* This is a lot like border sampling */
		2341	if (offsets[0]) {
		2342	/*
		2343	* coords are really unsigned, offsets are signed, but I don't think
		2344	* exceeding 31 bits is possible
		2345	*/
		2346	x = lp_build_add(int_coord_bld, x, offsets[0]);
		2347	}
		2348	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
		2349	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2350	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
		2351	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2352
		2353	if (dims >= 2) {
		2354	if (offsets[1]) {
		2355	y = lp_build_add(int_coord_bld, y, offsets[1]);
		2356	}
		2357	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
		2358	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2359	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
		2360	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2361
		2362	if (dims >= 3) {
		2363	if (offsets[2]) {
		2364	z = lp_build_add(int_coord_bld, z, offsets[2]);
		2365	}
		2366	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
		2367	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2368	out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
		2369	out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
		2370	}
		2371	}
		2372
		2373	lp_build_sample_offset(int_coord_bld,
		2374	bld->format_desc,
		2375	x, y, z, row_stride_vec, img_stride_vec,
		2376	&offset, &i, &j);
		2377
		2378	if (bld->static_texture_state->target != PIPE_BUFFER) {
		2379	offset = lp_build_add(int_coord_bld, offset,
		2380	lp_build_get_mip_offsets(bld, ilevel));
		2381	}
		2382
		2383	offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
		2384
		2385	lp_build_fetch_rgba_soa(bld->gallivm,
		2386	bld->format_desc,
		2387	bld->texel_type,
		2388	bld->base_ptr, offset,
		2389	i, j,
		2390	colors_out);
		2391
		2392	if (out_of_bound_ret_zero) {
		2393	/*
		2394	* Only needed for ARB_robust_buffer_access_behavior and d3d10.
		2395	* Could use min/max above instead of out-of-bounds comparisons
		2396	* if we don't care about the result returned for out-of-bounds.
		2397	*/
		2398	for (chan = 0; chan < 4; chan++) {
		2399	colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
		2400	bld->texel_bld.zero, colors_out[chan]);
		2401	}
		2402	}
		2403	}
		2404
		2405
		2406	/**
		2407	* Just set texels to white instead of actually sampling the texture.
		2408	* For debugging.
		2409	*/
		2410	void
		2411	lp_build_sample_nop(struct gallivm_state *gallivm,
		2412	struct lp_type type,
		2413	const LLVMValueRef *coords,
		2414	LLVMValueRef texel_out[4])
		2415	{
		2416	LLVMValueRef one = lp_build_one(gallivm, type);
		2417	unsigned chan;
		2418
		2419	for (chan = 0; chan < 4; chan++) {
		2420	texel_out[chan] = one;
		2421	}
		2422	}
		2423
		2424
		2425	/**
		2426	* Build the actual texture sampling code.
		2427	* 'texel' will return a vector of four LLVMValueRefs corresponding to
		2428	* R, G, B, A.
		2429	* \param type vector float type to use for coords, etc.
		2430	* \param sample_key
		2431	* \param derivs partial derivatives of (s,t,r,q) with respect to x and y
		2432	*/
		2433	static void
		2434	lp_build_sample_soa_code(struct gallivm_state *gallivm,
		2435	const struct lp_static_texture_state *static_texture_state,
		2436	const struct lp_static_sampler_state *static_sampler_state,
		2437	struct lp_sampler_dynamic_state *dynamic_state,
		2438	struct lp_type type,
		2439	unsigned sample_key,
		2440	unsigned texture_index,
		2441	unsigned sampler_index,
		2442	LLVMValueRef context_ptr,
		2443	const LLVMValueRef *coords,
		2444	const LLVMValueRef *offsets,
		2445	const struct lp_derivatives derivs, / optional */
		2446	LLVMValueRef lod, /* optional */
		2447	LLVMValueRef texel_out[4])
		2448	{
		2449	unsigned target = static_texture_state->target;
		2450	unsigned dims = texture_dims(target);
		2451	unsigned num_quads = type.length / 4;
		2452	unsigned mip_filter, min_img_filter, mag_img_filter, i;
		2453	struct lp_build_sample_context bld;
		2454	struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
		2455	LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
		2456	LLVMBuilderRef builder = gallivm->builder;
		2457	LLVMValueRef tex_width, newcoords[5];
		2458	enum lp_sampler_lod_property lod_property;
		2459	enum lp_sampler_lod_control lod_control;
		2460	enum lp_sampler_op_type op_type;
		2461	LLVMValueRef lod_bias = NULL;
		2462	LLVMValueRef explicit_lod = NULL;
		2463	boolean op_is_tex;
		2464
		2465	if (0) {
		2466	enum pipe_format fmt = static_texture_state->format;
		2467	debug_printf("Sample from %s\n", util_format_name(fmt));
		2468	}
		2469
		2470	lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
		2471	LP_SAMPLER_LOD_PROPERTY_SHIFT;
		2472	lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
		2473	LP_SAMPLER_LOD_CONTROL_SHIFT;
		2474	op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
		2475	LP_SAMPLER_OP_TYPE_SHIFT;
		2476
		2477	op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
		2478
		2479	if (lod_control == LP_SAMPLER_LOD_BIAS) {
		2480	lod_bias = lod;
		2481	assert(lod);
		2482	assert(derivs == NULL);
		2483	}
		2484	else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
		2485	explicit_lod = lod;
		2486	assert(lod);
		2487	assert(derivs == NULL);
		2488	}
		2489	else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
		2490	assert(derivs);
		2491	assert(lod == NULL);
		2492	}
		2493	else {
		2494	assert(derivs == NULL);
		2495	assert(lod == NULL);
		2496	}
		2497
		2498	if (static_texture_state->format == PIPE_FORMAT_NONE) {
		2499	/*
		2500	* If there's nothing bound, format is NONE, and we must return
		2501	* all zero as mandated by d3d10 in this case.
		2502	*/
		2503	unsigned chan;
		2504	LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
		2505	for (chan = 0; chan < 4; chan++) {
		2506	texel_out[chan] = zero;
		2507	}
		2508	return;
		2509	}
		2510
		2511	assert(type.floating);
		2512
		2513	/* Setup our build context */
		2514	memset(&bld, 0, sizeof bld);
		2515	bld.gallivm = gallivm;
		2516	bld.context_ptr = context_ptr;
		2517	bld.static_sampler_state = &derived_sampler_state;
		2518	bld.static_texture_state = static_texture_state;
		2519	bld.dynamic_state = dynamic_state;
		2520	bld.format_desc = util_format_description(static_texture_state->format);
		2521	bld.dims = dims;
		2522
		2523	bld.vector_width = lp_type_width(type);
		2524
		2525	bld.float_type = lp_type_float(32);
		2526	bld.int_type = lp_type_int(32);
		2527	bld.coord_type = type;
		2528	bld.int_coord_type = lp_int_type(type);
		2529	bld.float_size_in_type = lp_type_float(32);
		2530	bld.float_size_in_type.length = dims > 1 ? 4 : 1;
		2531	bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
		2532	bld.texel_type = type;
		2533
		2534	/* always using the first channel hopefully should be safe,
		2535	* if not things WILL break in other places anyway.
		2536	*/
		2537	if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
		2538	bld.format_desc->channel[0].pure_integer) {
		2539	if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
		2540	bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
		2541	}
		2542	else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
		2543	bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
		2544	}
		2545	}
		2546	else if (util_format_has_stencil(bld.format_desc) &&
		2547	!util_format_has_depth(bld.format_desc)) {
		2548	/* for stencil only formats, sample stencil (uint) */
		2549	bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
		2550	}
		2551
		2552	if (!static_texture_state->level_zero_only) {
		2553	derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
		2554	} else {
		2555	derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
		2556	}
		2557	if (op_type == LP_SAMPLER_OP_GATHER) {
		2558	/*
		2559	* gather4 is exactly like GL_LINEAR filtering but in the end skipping
		2560	* the actual filtering. Using mostly the same paths, so cube face
		2561	* selection, coord wrapping etc. all naturally uses the same code.
		2562	*/
		2563	derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
		2564	derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
		2565	derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
		2566	}
		2567	mip_filter = derived_sampler_state.min_mip_filter;
		2568
		2569	if (0) {
		2570	debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
		2571	}
		2572
		2573	if (static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		2574	static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
		2575	{
		2576	/*
		2577	* Seamless filtering ignores wrap modes.
		2578	* Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
		2579	* bilinear it's not correct but way better than using for instance repeat.
		2580	* Note we even set this for non-seamless. Technically GL allows any wrap
		2581	* mode, which made sense when supporting true borders (can get seamless
		2582	* effect with border and CLAMP_TO_BORDER), but gallium doesn't support
		2583	* borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
		2584	* up the sampler state (as it makes it texture dependent).
		2585	*/
		2586	derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
		2587	derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
		2588	}
		2589
		2590	min_img_filter = derived_sampler_state.min_img_filter;
		2591	mag_img_filter = derived_sampler_state.mag_img_filter;
		2592
		2593
		2594	/*
		2595	* This is all a bit complicated different paths are chosen for performance
		2596	* reasons.
		2597	* Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
		2598	* everything (the last two options are equivalent for 4-wide case).
		2599	* If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
		2600	* lod is calculated then the lod value extracted afterwards so making this
		2601	* case basically the same as far as lod handling is concerned for the
		2602	* further sample/filter code as the 1 lod for everything case.
		2603	* Different lod handling mostly shows up when building mipmap sizes
		2604	* (lp_build_mipmap_level_sizes() and friends) and also in filtering
		2605	* (getting the fractional part of the lod to the right texels).
		2606	*/
		2607
		2608	/*
		2609	* There are other situations where at least the multiple int lods could be
		2610	* avoided like min and max lod being equal.
		2611	*/
		2612	bld.num_mips = bld.num_lods = 1;
		2613
		2614	if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
		2615	(gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
		2616	(static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		2617	static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
		2618	(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
		2619	/*
		2620	* special case for using per-pixel lod even for implicit lod,
		2621	* which is generally never required (ok by APIs) except to please
		2622	* some (somewhat broken imho) tests (because per-pixel face selection
		2623	* can cause derivatives to be different for pixels outside the primitive
		2624	* due to the major axis division even if pre-project derivatives are
		2625	* looking normal).
		2626	*/
		2627	bld.num_mips = type.length;
		2628	bld.num_lods = type.length;
		2629	}
		2630	else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT \|\|
		2631	(explicit_lod \|\| lod_bias \|\| derivs)) {
		2632	if ((!op_is_tex && target != PIPE_BUFFER) \|\|
		2633	(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
		2634	bld.num_mips = type.length;
		2635	bld.num_lods = type.length;
		2636	}
		2637	else if (op_is_tex && min_img_filter != mag_img_filter) {
		2638	bld.num_mips = 1;
		2639	bld.num_lods = type.length;
		2640	}
		2641	}
		2642	/* TODO: for true scalar_lod should only use 1 lod value */
		2643	else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) \|\|
		2644	(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
		2645	bld.num_mips = num_quads;
		2646	bld.num_lods = num_quads;
		2647	}
		2648	else if (op_is_tex && min_img_filter != mag_img_filter) {
		2649	bld.num_mips = 1;
		2650	bld.num_lods = num_quads;
		2651	}
		2652
		2653
		2654	bld.lodf_type = type;
		2655	/* we want native vector size to be able to use our intrinsics */
		2656	if (bld.num_lods != type.length) {
		2657	/* TODO: this currently always has to be per-quad or per-element */
		2658	bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
		2659	}
		2660	bld.lodi_type = lp_int_type(bld.lodf_type);
		2661	bld.levelf_type = bld.lodf_type;
		2662	if (bld.num_mips == 1) {
		2663	bld.levelf_type.length = 1;
		2664	}
		2665	bld.leveli_type = lp_int_type(bld.levelf_type);
		2666	bld.float_size_type = bld.float_size_in_type;
		2667	/* Note: size vectors may not be native. They contain minified w/h/d/_ values,
		2668	* with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
		2669	if (bld.num_mips > 1) {
		2670	bld.float_size_type.length = bld.num_mips == type.length ?
		2671	bld.num_mips * bld.float_size_in_type.length :
		2672	type.length;
		2673	}
		2674	bld.int_size_type = lp_int_type(bld.float_size_type);
		2675
		2676	lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
		2677	lp_build_context_init(&bld.float_vec_bld, gallivm, type);
		2678	lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
		2679	lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
		2680	lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
		2681	lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
		2682	lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
		2683	lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
		2684	lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
		2685	lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
		2686	lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
		2687	lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
		2688	lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
		2689	lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
		2690
		2691	/* Get the dynamic state */
		2692	tex_width = dynamic_state->width(dynamic_state, gallivm,
		2693	context_ptr, texture_index);
		2694	bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
		2695	context_ptr, texture_index);
		2696	bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
		2697	context_ptr, texture_index);
		2698	bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
		2699	context_ptr, texture_index);
		2700	bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
		2701	context_ptr, texture_index);
		2702	/* Note that mip_offsets is an array[level] of offsets to texture images */
		2703
		2704	/* width, height, depth as single int vector */
		2705	if (dims <= 1) {
		2706	bld.int_size = tex_width;
		2707	}
		2708	else {
		2709	bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
		2710	tex_width,
		2711	LLVMConstInt(i32t, 0, 0), "");
		2712	if (dims >= 2) {
		2713	LLVMValueRef tex_height =
		2714	dynamic_state->height(dynamic_state, gallivm,
		2715	context_ptr, texture_index);
		2716	bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
		2717	tex_height,
		2718	LLVMConstInt(i32t, 1, 0), "");
		2719	if (dims >= 3) {
		2720	LLVMValueRef tex_depth =
		2721	dynamic_state->depth(dynamic_state, gallivm, context_ptr,
		2722	texture_index);
		2723	bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
		2724	tex_depth,
		2725	LLVMConstInt(i32t, 2, 0), "");
		2726	}
		2727	}
		2728	}
		2729
		2730	for (i = 0; i < 5; i++) {
		2731	newcoords[i] = coords[i];
		2732	}
		2733
		2734	if (0) {
		2735	/* For debug: no-op texture sampling */
		2736	lp_build_sample_nop(gallivm,
		2737	bld.texel_type,
		2738	newcoords,
		2739	texel_out);
		2740	}
		2741
		2742	else if (op_type == LP_SAMPLER_OP_FETCH) {
		2743	lp_build_fetch_texel(&bld, texture_index, newcoords,
		2744	lod, offsets,
		2745	texel_out);
		2746	}
		2747
		2748	else {
		2749	LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
		2750	LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
		2751	boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
		2752	op_is_tex &&
		2753	/* not sure this is strictly needed or simply impossible */
		2754	derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
		2755	lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
		2756
		2757	use_aos &= bld.num_lods <= num_quads \|\|
		2758	derived_sampler_state.min_img_filter ==
		2759	derived_sampler_state.mag_img_filter;
		2760	if (dims > 1) {
		2761	use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
		2762	if (dims > 2) {
		2763	use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
		2764	}
		2765	}
		2766	if ((static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		2767	static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
		2768	derived_sampler_state.seamless_cube_map &&
		2769	(derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR \|\|
		2770	derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
		2771	/* theoretically possible with AoS filtering but not implemented (complex!) */
		2772	use_aos = 0;
		2773	}
		2774
		2775	if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
		2776	!use_aos && util_format_fits_8unorm(bld.format_desc)) {
		2777	debug_printf("%s: using floating point linear filtering for %s\n",
		2778	__FUNCTION__, bld.format_desc->short_name);
		2779	debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
		2780	" wraps %d wrapt %d wrapr %d\n",
		2781	derived_sampler_state.min_img_filter,
		2782	derived_sampler_state.mag_img_filter,
		2783	derived_sampler_state.min_mip_filter,
		2784	static_texture_state->target,
		2785	derived_sampler_state.seamless_cube_map,
		2786	derived_sampler_state.wrap_s,
		2787	derived_sampler_state.wrap_t,
		2788	derived_sampler_state.wrap_r);
		2789	}
		2790
		2791	lp_build_sample_common(&bld, texture_index, sampler_index,
		2792	newcoords,
		2793	derivs, lod_bias, explicit_lod,
		2794	&lod_positive, &lod_fpart,
		2795	&ilevel0, &ilevel1);
		2796
		2797	if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
		2798	/* The aos path doesn't do seamless filtering so simply add cube layer
		2799	* to face now.
		2800	*/
		2801	newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
		2802	}
		2803
		2804	/*
		2805	* we only try 8-wide sampling with soa as it appears to
		2806	* be a loss with aos with AVX (but it should work, except
		2807	* for conformance if min_filter != mag_filter if num_lods > 1).
		2808	* (It should be faster if we'd support avx2)
		2809	*/
		2810	if (num_quads == 1 \|\| !use_aos) {
		2811	if (use_aos) {
		2812	/* do sampling/filtering with fixed pt arithmetic */
		2813	lp_build_sample_aos(&bld, sampler_index,
		2814	newcoords[0], newcoords[1],
		2815	newcoords[2],
		2816	offsets, lod_positive, lod_fpart,
		2817	ilevel0, ilevel1,
		2818	texel_out);
		2819	}
		2820
		2821	else {
		2822	lp_build_sample_general(&bld, sampler_index,
		2823	op_type == LP_SAMPLER_OP_GATHER,
		2824	newcoords, offsets,
		2825	lod_positive, lod_fpart,
		2826	ilevel0, ilevel1,
		2827	texel_out);
		2828	}
		2829	}
		2830	else {
		2831	unsigned j;
		2832	struct lp_build_sample_context bld4;
		2833	struct lp_type type4 = type;
		2834	unsigned i;
		2835	LLVMValueRef texelout4[4];
		2836	LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
		2837
		2838	type4.length = 4;
		2839
		2840	/* Setup our build context */
		2841	memset(&bld4, 0, sizeof bld4);
		2842	bld4.gallivm = bld.gallivm;
		2843	bld4.context_ptr = bld.context_ptr;
		2844	bld4.static_texture_state = bld.static_texture_state;
		2845	bld4.static_sampler_state = bld.static_sampler_state;
		2846	bld4.dynamic_state = bld.dynamic_state;
		2847	bld4.format_desc = bld.format_desc;
		2848	bld4.dims = bld.dims;
		2849	bld4.row_stride_array = bld.row_stride_array;
		2850	bld4.img_stride_array = bld.img_stride_array;
		2851	bld4.base_ptr = bld.base_ptr;
		2852	bld4.mip_offsets = bld.mip_offsets;
		2853	bld4.int_size = bld.int_size;
		2854
		2855	bld4.vector_width = lp_type_width(type4);
		2856
		2857	bld4.float_type = lp_type_float(32);
		2858	bld4.int_type = lp_type_int(32);
		2859	bld4.coord_type = type4;
		2860	bld4.int_coord_type = lp_int_type(type4);
		2861	bld4.float_size_in_type = lp_type_float(32);
		2862	bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
		2863	bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
		2864	bld4.texel_type = bld.texel_type;
		2865	bld4.texel_type.length = 4;
		2866
		2867	bld4.num_mips = bld4.num_lods = 1;
		2868	if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
		2869	(gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
		2870	(static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		2871	static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
		2872	(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
		2873	bld4.num_mips = type4.length;
		2874	bld4.num_lods = type4.length;
		2875	}
		2876	if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
		2877	(explicit_lod \|\| lod_bias \|\| derivs)) {
		2878	if ((!op_is_tex && target != PIPE_BUFFER) \|\|
		2879	(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
		2880	bld4.num_mips = type4.length;
		2881	bld4.num_lods = type4.length;
		2882	}
		2883	else if (op_is_tex && min_img_filter != mag_img_filter) {
		2884	bld4.num_mips = 1;
		2885	bld4.num_lods = type4.length;
		2886	}
		2887	}
		2888
		2889	/* we want native vector size to be able to use our intrinsics */
		2890	bld4.lodf_type = type4;
		2891	if (bld4.num_lods != type4.length) {
		2892	bld4.lodf_type.length = 1;
		2893	}
		2894	bld4.lodi_type = lp_int_type(bld4.lodf_type);
		2895	bld4.levelf_type = type4;
		2896	if (bld4.num_mips != type4.length) {
		2897	bld4.levelf_type.length = 1;
		2898	}
		2899	bld4.leveli_type = lp_int_type(bld4.levelf_type);
		2900	bld4.float_size_type = bld4.float_size_in_type;
		2901	if (bld4.num_mips > 1) {
		2902	bld4.float_size_type.length = bld4.num_mips == type4.length ?
		2903	bld4.num_mips * bld4.float_size_in_type.length :
		2904	type4.length;
		2905	}
		2906	bld4.int_size_type = lp_int_type(bld4.float_size_type);
		2907
		2908	lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
		2909	lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
		2910	lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
		2911	lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
		2912	lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
		2913	lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
		2914	lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
		2915	lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
		2916	lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
		2917	lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
		2918	lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
		2919	lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
		2920	lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
		2921	lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
		2922
		2923	for (i = 0; i < num_quads; i++) {
		2924	LLVMValueRef s4, t4, r4;
		2925	LLVMValueRef lod_positive4, lod_fpart4 = NULL;
		2926	LLVMValueRef ilevel04, ilevel14 = NULL;
		2927	LLVMValueRef offsets4[4] = { NULL };
		2928	unsigned num_lods = bld4.num_lods;
		2929
		2930	s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
		2931	t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
		2932	r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
		2933
		2934	if (offsets[0]) {
		2935	offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
		2936	if (dims > 1) {
		2937	offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
		2938	if (dims > 2) {
		2939	offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
		2940	}
		2941	}
		2942	}
		2943	lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
		2944	ilevel04 = bld.num_mips == 1 ? ilevel0 :
		2945	lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
		2946	if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
		2947	ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
		2948	lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
		2949	}
		2950
		2951	if (use_aos) {
		2952	/* do sampling/filtering with fixed pt arithmetic */
		2953	lp_build_sample_aos(&bld4, sampler_index,
		2954	s4, t4, r4, offsets4,
		2955	lod_positive4, lod_fpart4,
		2956	ilevel04, ilevel14,
		2957	texelout4);
		2958	}
		2959
		2960	else {
		2961	/* this path is currently unreachable and hence might break easily... */
		2962	LLVMValueRef newcoords4[5];
		2963	newcoords4[0] = s4;
		2964	newcoords4[1] = t4;
		2965	newcoords4[2] = r4;
		2966	newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
		2967	newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
		2968
		2969	lp_build_sample_general(&bld4, sampler_index,
		2970	op_type == LP_SAMPLER_OP_GATHER,
		2971	newcoords4, offsets4,
		2972	lod_positive4, lod_fpart4,
		2973	ilevel04, ilevel14,
		2974	texelout4);
		2975	}
		2976	for (j = 0; j < 4; j++) {
		2977	texelouttmp[j][i] = texelout4[j];
		2978	}
		2979	}
		2980
		2981	for (j = 0; j < 4; j++) {
		2982	texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
		2983	}
		2984	}
		2985	}
		2986
		2987	if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
		2988	apply_sampler_swizzle(&bld, texel_out);
		2989	}
		2990
		2991	/*
		2992	* texel type can be a (32bit) int/uint (for pure int formats only),
		2993	* however we are expected to always return floats (storage is untyped).
		2994	*/
		2995	if (!bld.texel_type.floating) {
		2996	unsigned chan;
		2997	for (chan = 0; chan < 4; chan++) {
		2998	texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
		2999	lp_build_vec_type(gallivm, type), "");
		3000	}
		3001	}
		3002	}
		3003
		3004
		3005	#define USE_TEX_FUNC_CALL 1
		3006
		3007	#define LP_MAX_TEX_FUNC_ARGS 32
		3008
		3009	static inline void
		3010	get_target_info(enum pipe_texture_target target,
		3011	unsigned num_coords, unsigned num_derivs,
		3012	unsigned num_offsets, unsigned layer)
		3013	{
		3014	unsigned dims = texture_dims(target);
		3015	*num_coords = dims;
		3016	*num_offsets = dims;
		3017	*num_derivs = (target == PIPE_TEXTURE_CUBE \|\|
		3018	target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
		3019	*layer = has_layer_coord(target) ? 2: 0;
		3020	if (target == PIPE_TEXTURE_CUBE_ARRAY) {
		3021	/*
		3022	* dims doesn't include r coord for cubes - this is handled
		3023	* by layer instead, but need to fix up for cube arrays...
		3024	*/
		3025	*layer = 3;
		3026	*num_coords = 3;
		3027	}
		3028	}
		3029
		3030
		3031	/**
		3032	* Generate the function body for a texture sampling function.
		3033	*/
		3034	static void
		3035	lp_build_sample_gen_func(struct gallivm_state *gallivm,
		3036	const struct lp_static_texture_state *static_texture_state,
		3037	const struct lp_static_sampler_state *static_sampler_state,
		3038	struct lp_sampler_dynamic_state *dynamic_state,
		3039	struct lp_type type,
		3040	unsigned texture_index,
		3041	unsigned sampler_index,
		3042	LLVMValueRef function,
		3043	unsigned num_args,
		3044	unsigned sample_key)
		3045	{
		3046	LLVMBuilderRef old_builder;
		3047	LLVMBasicBlockRef block;
		3048	LLVMValueRef coords[5];
		3049	LLVMValueRef offsets[3] = { NULL };
		3050	LLVMValueRef lod = NULL;
		3051	LLVMValueRef context_ptr;
		3052	LLVMValueRef texel_out[4];
		3053	struct lp_derivatives derivs;
		3054	struct lp_derivatives *deriv_ptr = NULL;
		3055	unsigned num_param = 0;
		3056	unsigned i, num_coords, num_derivs, num_offsets, layer;
		3057	enum lp_sampler_lod_control lod_control;
		3058
		3059	lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
		3060	LP_SAMPLER_LOD_CONTROL_SHIFT;
		3061
		3062	get_target_info(static_texture_state->target,
		3063	&num_coords, &num_derivs, &num_offsets, &layer);
		3064
		3065	/* "unpack" arguments */
		3066	context_ptr = LLVMGetParam(function, num_param++);
		3067	for (i = 0; i < num_coords; i++) {
		3068	coords[i] = LLVMGetParam(function, num_param++);
		3069	}
		3070	for (i = num_coords; i < 5; i++) {
		3071	/* This is rather unfortunate... */
		3072	coords[i] = lp_build_undef(gallivm, type);
		3073	}
		3074	if (layer) {
		3075	coords[layer] = LLVMGetParam(function, num_param++);
		3076	}
		3077	if (sample_key & LP_SAMPLER_SHADOW) {
		3078	coords[4] = LLVMGetParam(function, num_param++);
		3079	}
		3080	if (sample_key & LP_SAMPLER_OFFSETS) {
		3081	for (i = 0; i < num_offsets; i++) {
		3082	offsets[i] = LLVMGetParam(function, num_param++);
		3083	}
		3084	}
		3085	if (lod_control == LP_SAMPLER_LOD_BIAS \|\|
		3086	lod_control == LP_SAMPLER_LOD_EXPLICIT) {
		3087	lod = LLVMGetParam(function, num_param++);
		3088	}
		3089	else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
		3090	for (i = 0; i < num_derivs; i++) {
		3091	derivs.ddx[i] = LLVMGetParam(function, num_param++);
		3092	derivs.ddy[i] = LLVMGetParam(function, num_param++);
		3093	}
		3094	deriv_ptr = &derivs;
		3095	}
		3096
		3097	assert(num_args == num_param);
		3098
		3099	/*
		3100	* Function body
		3101	*/
		3102
		3103	old_builder = gallivm->builder;
		3104	block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
		3105	gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
		3106	LLVMPositionBuilderAtEnd(gallivm->builder, block);
		3107
		3108	lp_build_sample_soa_code(gallivm,
		3109	static_texture_state,
		3110	static_sampler_state,
		3111	dynamic_state,
		3112	type,
		3113	sample_key,
		3114	texture_index,
		3115	sampler_index,
		3116	context_ptr,
		3117	coords,
		3118	offsets,
		3119	deriv_ptr,
		3120	lod,
		3121	texel_out);
		3122
		3123	LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
		3124
		3125	LLVMDisposeBuilder(gallivm->builder);
		3126	gallivm->builder = old_builder;
		3127
		3128	gallivm_verify_function(gallivm, function);
		3129	}
		3130
		3131
		3132	/**
		3133	* Call the matching function for texture sampling.
		3134	* If there's no match, generate a new one.
		3135	*/
		3136	static void
		3137	lp_build_sample_soa_func(struct gallivm_state *gallivm,
		3138	const struct lp_static_texture_state *static_texture_state,
		3139	const struct lp_static_sampler_state *static_sampler_state,
		3140	struct lp_sampler_dynamic_state *dynamic_state,
		3141	const struct lp_sampler_params *params)
		3142	{
		3143	LLVMBuilderRef builder = gallivm->builder;
		3144	LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
		3145	LLVMGetInsertBlock(builder)));
		3146	LLVMValueRef function, inst;
		3147	LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
		3148	LLVMBasicBlockRef bb;
		3149	LLVMValueRef tex_ret;
		3150	unsigned num_args = 0;
		3151	char func_name[64];
		3152	unsigned i, num_coords, num_derivs, num_offsets, layer;
		3153	unsigned texture_index = params->texture_index;
		3154	unsigned sampler_index = params->sampler_index;
		3155	unsigned sample_key = params->sample_key;
		3156	const LLVMValueRef *coords = params->coords;
		3157	const LLVMValueRef *offsets = params->offsets;
		3158	const struct lp_derivatives *derivs = params->derivs;
		3159	enum lp_sampler_lod_control lod_control;
		3160
		3161	lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
		3162	LP_SAMPLER_LOD_CONTROL_SHIFT;
		3163
		3164	get_target_info(static_texture_state->target,
		3165	&num_coords, &num_derivs, &num_offsets, &layer);
		3166
		3167	/*
		3168	* texture function matches are found by name.
		3169	* Thus the name has to include both the texture and sampler unit
		3170	* (which covers all static state) plus the actual texture function
		3171	* (including things like offsets, shadow coord, lod control).
		3172	* Additionally lod_property has to be included too.
		3173	*/
		3174
		3175	util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
		3176	texture_index, sampler_index, sample_key);
		3177
		3178	function = LLVMGetNamedFunction(module, func_name);
		3179
		3180	if(!function) {
		3181	LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
		3182	LLVMTypeRef ret_type;
		3183	LLVMTypeRef function_type;
		3184	LLVMTypeRef val_type[4];
		3185	unsigned num_param = 0;
		3186
		3187	/*
		3188	* Generate the function prototype.
		3189	*/
		3190
		3191	arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
		3192	for (i = 0; i < num_coords; i++) {
		3193	arg_types[num_param++] = LLVMTypeOf(coords[0]);
		3194	assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
		3195	}
		3196	if (layer) {
		3197	arg_types[num_param++] = LLVMTypeOf(coords[layer]);
		3198	assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
		3199	}
		3200	if (sample_key & LP_SAMPLER_SHADOW) {
		3201	arg_types[num_param++] = LLVMTypeOf(coords[0]);
		3202	}
		3203	if (sample_key & LP_SAMPLER_OFFSETS) {
		3204	for (i = 0; i < num_offsets; i++) {
		3205	arg_types[num_param++] = LLVMTypeOf(offsets[0]);
		3206	assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
		3207	}
		3208	}
		3209	if (lod_control == LP_SAMPLER_LOD_BIAS \|\|
		3210	lod_control == LP_SAMPLER_LOD_EXPLICIT) {
		3211	arg_types[num_param++] = LLVMTypeOf(params->lod);
		3212	}
		3213	else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
		3214	for (i = 0; i < num_derivs; i++) {
		3215	arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
		3216	arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
		3217	assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
		3218	assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
		3219	}
		3220	}
		3221
		3222	val_type[0] = val_type[1] = val_type[2] = val_type[3] =
		3223	lp_build_vec_type(gallivm, params->type);
		3224	ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
		3225	function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
		3226	function = LLVMAddFunction(module, func_name, function_type);
		3227
		3228	for (i = 0; i < num_param; ++i) {
		3229	if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
		3230	LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
		3231	}
		3232	}
		3233
		3234	LLVMSetFunctionCallConv(function, LLVMFastCallConv);
		3235	LLVMSetLinkage(function, LLVMPrivateLinkage);
		3236
		3237	lp_build_sample_gen_func(gallivm,
		3238	static_texture_state,
		3239	static_sampler_state,
		3240	dynamic_state,
		3241	params->type,
		3242	texture_index,
		3243	sampler_index,
		3244	function,
		3245	num_param,
		3246	sample_key);
		3247	}
		3248
		3249	num_args = 0;
		3250	args[num_args++] = params->context_ptr;
		3251	for (i = 0; i < num_coords; i++) {
		3252	args[num_args++] = coords[i];
		3253	}
		3254	if (layer) {
		3255	args[num_args++] = coords[layer];
		3256	}
		3257	if (sample_key & LP_SAMPLER_SHADOW) {
		3258	args[num_args++] = coords[4];
		3259	}
		3260	if (sample_key & LP_SAMPLER_OFFSETS) {
		3261	for (i = 0; i < num_offsets; i++) {
		3262	args[num_args++] = offsets[i];
		3263	}
		3264	}
		3265	if (lod_control == LP_SAMPLER_LOD_BIAS \|\|
		3266	lod_control == LP_SAMPLER_LOD_EXPLICIT) {
		3267	args[num_args++] = params->lod;
		3268	}
		3269	else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
		3270	for (i = 0; i < num_derivs; i++) {
		3271	args[num_args++] = derivs->ddx[i];
		3272	args[num_args++] = derivs->ddy[i];
		3273	}
		3274	}
		3275
		3276	assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
		3277
		3278	tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
		3279	bb = LLVMGetInsertBlock(builder);
		3280	inst = LLVMGetLastInstruction(bb);
		3281	LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
		3282
		3283	for (i = 0; i < 4; i++) {
		3284	params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
		3285	}
		3286	}
		3287
		3288
		3289	/**
		3290	* Build texture sampling code.
		3291	* Either via a function call or inline it directly.
		3292	*/
		3293	void
		3294	lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
		3295	const struct lp_static_sampler_state *static_sampler_state,
		3296	struct lp_sampler_dynamic_state *dynamic_state,
		3297	struct gallivm_state *gallivm,
		3298	const struct lp_sampler_params *params)
		3299	{
		3300	boolean use_tex_func = FALSE;
		3301
		3302	/*
		3303	* Do not use a function call if the sampling is "simple enough".
		3304	* We define this by
		3305	* a) format
		3306	* b) no mips (either one level only or no mip filter)
		3307	* No mips will definitely make the code smaller, though
		3308	* the format requirement is a bit iffy - there's some (SoA) formats
		3309	* which definitely generate less code. This does happen to catch
		3310	* some important cases though which are hurt quite a bit by using
		3311	* a call (though not really because of the call overhead but because
		3312	* they are reusing the same texture unit with some of the same
		3313	* parameters).
		3314	* Ideally we'd let llvm recognize this stuff by doing IPO passes.
		3315	*/
		3316
		3317	if (USE_TEX_FUNC_CALL) {
		3318	const struct util_format_description *format_desc;
		3319	boolean simple_format;
		3320	boolean simple_tex;
		3321	enum lp_sampler_op_type op_type;
		3322	format_desc = util_format_description(static_texture_state->format);
		3323	simple_format = !format_desc \|\|
		3324	(util_format_is_rgba8_variant(format_desc) &&
		3325	format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
		3326
		3327	op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
		3328	LP_SAMPLER_OP_TYPE_SHIFT;
		3329	simple_tex =
		3330	op_type != LP_SAMPLER_OP_TEXTURE \|\|
		3331	((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE \|\|
		3332	static_texture_state->level_zero_only == TRUE) &&
		3333	static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
		3334
		3335	use_tex_func = format_desc && !(simple_format && simple_tex);
		3336	}
		3337
		3338	if (use_tex_func) {
		3339	lp_build_sample_soa_func(gallivm,
		3340	static_texture_state,
		3341	static_sampler_state,
		3342	dynamic_state,
		3343	params);
		3344	}
		3345	else {
		3346	lp_build_sample_soa_code(gallivm,
		3347	static_texture_state,
		3348	static_sampler_state,
		3349	dynamic_state,
		3350	params->type,
		3351	params->sample_key,
		3352	params->texture_index,
		3353	params->sampler_index,
		3354	params->context_ptr,
		3355	params->coords,
		3356	params->offsets,
		3357	params->derivs,
		3358	params->lod,
		3359	params->texel);
		3360	}
		3361	}
		3362
		3363
		3364	void
		3365	lp_build_size_query_soa(struct gallivm_state *gallivm,
		3366	const struct lp_static_texture_state *static_state,
		3367	struct lp_sampler_dynamic_state *dynamic_state,
		3368	struct lp_type int_type,
		3369	unsigned texture_unit,
		3370	unsigned target,
		3371	LLVMValueRef context_ptr,
		3372	boolean is_sviewinfo,
		3373	enum lp_sampler_lod_property lod_property,
		3374	LLVMValueRef explicit_lod,
		3375	LLVMValueRef *sizes_out)
		3376	{
		3377	LLVMValueRef lod, level, size;
		3378	LLVMValueRef first_level = NULL;
		3379	int dims, i;
		3380	boolean has_array;
		3381	unsigned num_lods = 1;
		3382	struct lp_build_context bld_int_vec4;
		3383
		3384	if (static_state->format == PIPE_FORMAT_NONE) {
		3385	/*
		3386	* If there's nothing bound, format is NONE, and we must return
		3387	* all zero as mandated by d3d10 in this case.
		3388	*/
		3389	unsigned chan;
		3390	LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
		3391	for (chan = 0; chan < 4; chan++) {
		3392	sizes_out[chan] = zero;
		3393	}
		3394	return;
		3395	}
		3396
		3397	/*
		3398	* Do some sanity verification about bound texture and shader dcl target.
		3399	* Not entirely sure what's possible but assume array/non-array
		3400	* always compatible (probably not ok for OpenGL but d3d10 has no
		3401	* distinction of arrays at the resource level).
		3402	* Everything else looks bogus (though not entirely sure about rect/2d).
		3403	* Currently disabled because it causes assertion failures if there's
		3404	* nothing bound (or rather a dummy texture, not that this case would
		3405	* return the right values).
		3406	*/
		3407	if (0 && static_state->target != target) {
		3408	if (static_state->target == PIPE_TEXTURE_1D)
		3409	assert(target == PIPE_TEXTURE_1D_ARRAY);
		3410	else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
		3411	assert(target == PIPE_TEXTURE_1D);
		3412	else if (static_state->target == PIPE_TEXTURE_2D)
		3413	assert(target == PIPE_TEXTURE_2D_ARRAY);
		3414	else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
		3415	assert(target == PIPE_TEXTURE_2D);
		3416	else if (static_state->target == PIPE_TEXTURE_CUBE)
		3417	assert(target == PIPE_TEXTURE_CUBE_ARRAY);
		3418	else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
		3419	assert(target == PIPE_TEXTURE_CUBE);
		3420	else
		3421	assert(0);
		3422	}
		3423
		3424	dims = texture_dims(target);
		3425
		3426	switch (target) {
		3427	case PIPE_TEXTURE_1D_ARRAY:
		3428	case PIPE_TEXTURE_2D_ARRAY:
		3429	case PIPE_TEXTURE_CUBE_ARRAY:
		3430	has_array = TRUE;
		3431	break;
		3432	default:
		3433	has_array = FALSE;
		3434	break;
		3435	}
		3436
		3437	assert(!int_type.floating);
		3438
		3439	lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
		3440
		3441	if (explicit_lod) {
		3442	/* FIXME: this needs to honor per-element lod */
		3443	lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
		3444	lp_build_const_int32(gallivm, 0), "");
		3445	first_level = dynamic_state->first_level(dynamic_state, gallivm,
		3446	context_ptr, texture_unit);
		3447	level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
		3448	lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
		3449	} else {
		3450	lod = bld_int_vec4.zero;
		3451	}
		3452
		3453	size = bld_int_vec4.undef;
		3454
		3455	size = LLVMBuildInsertElement(gallivm->builder, size,
		3456	dynamic_state->width(dynamic_state, gallivm,
		3457	context_ptr, texture_unit),
		3458	lp_build_const_int32(gallivm, 0), "");
		3459
		3460	if (dims >= 2) {
		3461	size = LLVMBuildInsertElement(gallivm->builder, size,
		3462	dynamic_state->height(dynamic_state, gallivm,
		3463	context_ptr, texture_unit),
		3464	lp_build_const_int32(gallivm, 1), "");
		3465	}
		3466
		3467	if (dims >= 3) {
		3468	size = LLVMBuildInsertElement(gallivm->builder, size,
		3469	dynamic_state->depth(dynamic_state, gallivm,
		3470	context_ptr, texture_unit),
		3471	lp_build_const_int32(gallivm, 2), "");
		3472	}
		3473
		3474	size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
		3475
		3476	if (has_array) {
		3477	LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
		3478	context_ptr, texture_unit);
		3479	if (target == PIPE_TEXTURE_CUBE_ARRAY) {
		3480	/*
		3481	* It looks like GL wants number of cubes, d3d10.1 has it undefined?
		3482	* Could avoid this by passing in number of cubes instead of total
		3483	* number of layers (might make things easier elsewhere too).
		3484	*/
		3485	LLVMValueRef six = lp_build_const_int32(gallivm, 6);
		3486	layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
		3487	}
		3488	size = LLVMBuildInsertElement(gallivm->builder, size, layers,
		3489	lp_build_const_int32(gallivm, dims), "");
		3490	}
		3491
		3492	/*
		3493	* d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
		3494	* if level is out of bounds (note this can't cover unbound texture
		3495	* here, which also requires returning zero).
		3496	*/
		3497	if (explicit_lod && is_sviewinfo) {
		3498	LLVMValueRef last_level, out, out1;
		3499	struct lp_build_context leveli_bld;
		3500
		3501	/* everything is scalar for now */
		3502	lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
		3503	last_level = dynamic_state->last_level(dynamic_state, gallivm,
		3504	context_ptr, texture_unit);
		3505
		3506	out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
		3507	out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
		3508	out = lp_build_or(&leveli_bld, out, out1);
		3509	if (num_lods == 1) {
		3510	out = lp_build_broadcast_scalar(&bld_int_vec4, out);
		3511	}
		3512	else {
		3513	/* TODO */
		3514	assert(0);
		3515	}
		3516	size = lp_build_andnot(&bld_int_vec4, size, out);
		3517	}
		3518	for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
		3519	sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
		3520	size,
		3521	lp_build_const_int32(gallivm, i));
		3522	}
		3523	if (is_sviewinfo) {
		3524	for (; i < 4; i++) {
		3525	sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
		3526	}
		3527	}
		3528
		3529	/*
		3530	* if there's no explicit_lod (buffers, rects) queries requiring nr of
		3531	* mips would be illegal.
		3532	*/
		3533	if (is_sviewinfo && explicit_lod) {
		3534	struct lp_build_context bld_int_scalar;
		3535	LLVMValueRef num_levels;
		3536	lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
		3537
		3538	if (static_state->level_zero_only) {
		3539	num_levels = bld_int_scalar.one;
		3540	}
		3541	else {
		3542	LLVMValueRef last_level;
		3543
		3544	last_level = dynamic_state->last_level(dynamic_state, gallivm,
		3545	context_ptr, texture_unit);
		3546	num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
		3547	num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
		3548	}
		3549	sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
		3550	num_levels);
		3551	}
		3552	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c – Rev 5571