WebSVN – Kolibri OS – Blame – /drivers/video/Intel-2D/gen4_render.c

Rev	Author	Line No.	Line
3291	Serge	1	/*
		2	* Copyright � 2006,2008,2011 Intel Corporation
		3	* Copyright � 2007 Red Hat, Inc.
		4	*
		5	* Permission is hereby granted, free of charge, to any person obtaining a
		6	* copy of this software and associated documentation files (the "Software"),
		7	* to deal in the Software without restriction, including without limitation
		8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		9	* and/or sell copies of the Software, and to permit persons to whom the
		10	* Software is furnished to do so, subject to the following conditions:
		11	*
		12	* The above copyright notice and this permission notice (including the next
		13	* paragraph) shall be included in all copies or substantial portions of the
		14	* Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
		22	* SOFTWARE.
		23	*
		24	* Authors:
		25	* Wang Zhenyu
		26	* Eric Anholt
		27	* Carl Worth
		28	* Keith Packard
		29	* Chris Wilson
		30	*
		31	*/
		32
		33	#ifdef HAVE_CONFIG_H
		34	#include "config.h"
		35	#endif
		36
		37	#include "sna.h"
		38	#include "sna_reg.h"
		39	#include "sna_render.h"
		40	#include "sna_render_inline.h"
		41	//#include "sna_video.h"
		42
		43	#include "brw/brw.h"
		44	#include "gen4_render.h"
		45	#include "gen4_source.h"
		46	#include "gen4_vertex.h"
		47
		48	/* gen4 has a serious issue with its shaders that we need to flush
		49	* after every rectangle... So until that is resolved, prefer
		50	* the BLT engine.
		51	*/
		52	#define FORCE_SPANS 0
		53	#define FORCE_NONRECTILINEAR_SPANS -1
4251	Serge	54	#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
3291	Serge	55
		56	#define NO_COMPOSITE 0
		57	#define NO_COMPOSITE_SPANS 0
		58	#define NO_COPY 0
		59	#define NO_COPY_BOXES 0
		60	#define NO_FILL 0
		61	#define NO_FILL_ONE 0
		62	#define NO_FILL_BOXES 0
		63	#define NO_VIDEO 0
		64
4251	Serge	65	#define MAX_FLUSH_VERTICES 6
		66
3291	Serge	67	#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
		68
		69	/* Set up a default static partitioning of the URB, which is supposed to
		70	* allow anything we would want to do, at potentially lower performance.
		71	*/
		72	#define URB_CS_ENTRY_SIZE 1
		73	#define URB_CS_ENTRIES 0
		74
		75	#define URB_VS_ENTRY_SIZE 1
		76	#define URB_VS_ENTRIES 32
		77
		78	#define URB_GS_ENTRY_SIZE 0
		79	#define URB_GS_ENTRIES 0
		80
		81	#define URB_CLIP_ENTRY_SIZE 0
		82	#define URB_CLIP_ENTRIES 0
		83
		84	#define URB_SF_ENTRY_SIZE 2
		85	#define URB_SF_ENTRIES 64
		86
		87	/*
		88	* this program computes dA/dx and dA/dy for the texture coordinates along
		89	* with the base texture coordinate. It was extracted from the Mesa driver
		90	*/
		91
		92	#define SF_KERNEL_NUM_GRF 16
		93	#define PS_KERNEL_NUM_GRF 32
		94
		95	#define GEN4_MAX_SF_THREADS 24
		96	#define GEN4_MAX_WM_THREADS 32
		97	#define G4X_MAX_WM_THREADS 50
		98
		99	static const uint32_t ps_kernel_packed_static[][4] = {
		100	#include "exa_wm_xy.g4b"
		101	#include "exa_wm_src_affine.g4b"
		102	#include "exa_wm_src_sample_argb.g4b"
		103	#include "exa_wm_yuv_rgb.g4b"
		104	#include "exa_wm_write.g4b"
		105	};
		106
		107	static const uint32_t ps_kernel_planar_static[][4] = {
		108	#include "exa_wm_xy.g4b"
		109	#include "exa_wm_src_affine.g4b"
		110	#include "exa_wm_src_sample_planar.g4b"
		111	#include "exa_wm_yuv_rgb.g4b"
		112	#include "exa_wm_write.g4b"
		113	};
		114
		115	#define NOKERNEL(kernel_enum, func, masked) \
		116	[kernel_enum] = {func, 0, masked}
		117	#define KERNEL(kernel_enum, kernel, masked) \
		118	[kernel_enum] = {&kernel, sizeof(kernel), masked}
		119	static const struct wm_kernel_info {
		120	const void *data;
		121	unsigned int size;
		122	bool has_mask;
		123	} wm_kernels[] = {
		124	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
		125	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
		126
		127	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
		128	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
		129
		130	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
		131	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
		132
		133	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
		134	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
		135
		136	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
		137	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
		138
		139	KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
		140	KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
		141	};
		142	#undef KERNEL
		143
		144	static const struct blendinfo {
		145	bool src_alpha;
		146	uint32_t src_blend;
		147	uint32_t dst_blend;
		148	} gen4_blend_op[] = {
		149	/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
		150	/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
		151	/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
		152	/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		153	/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
		154	/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
		155	/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
		156	/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
		157	/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		158	/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		159	/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
		160	/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		161	/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
		162	};
		163
		164	/**
		165	* Highest-valued BLENDFACTOR used in gen4_blend_op.
		166	*
		167	* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
		168	* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
		169	* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
		170	*/
		171	#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
		172
		173	#define BLEND_OFFSET(s, d) \
		174	(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
		175
		176	#define SAMPLER_OFFSET(sf, se, mf, me, k) \
		177	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
		178
		179	static void
		180	gen4_emit_pipelined_pointers(struct sna *sna,
		181	const struct sna_composite_op *op,
		182	int blend, int kernel);
		183
		184	#define OUT_BATCH(v) batch_emit(sna, v)
		185	#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
		186	#define OUT_VERTEX_F(v) vertex_emit(sna, v)
		187
		188	#define GEN4_MAX_3D_SIZE 8192
		189
		190	static inline bool too_large(int width, int height)
		191	{
		192	return width > GEN4_MAX_3D_SIZE \|\| height > GEN4_MAX_3D_SIZE;
		193	}
		194
		195	static int
		196	gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
		197	{
		198	int base;
		199
		200	if (has_mask) {
		201	if (is_ca) {
		202	if (gen4_blend_op[op].src_alpha)
		203	base = WM_KERNEL_MASKSA;
		204	else
		205	base = WM_KERNEL_MASKCA;
		206	} else
		207	base = WM_KERNEL_MASK;
		208	} else
		209	base = WM_KERNEL;
		210
		211	return base + !is_affine;
		212	}
		213
		214	static bool gen4_magic_ca_pass(struct sna *sna,
		215	const struct sna_composite_op *op)
		216	{
		217	struct gen4_render_state *state = &sna->render_state.gen4;
		218
		219	if (!op->need_magic_ca_pass)
		220	return false;
		221
		222	assert(sna->render.vertex_index > sna->render.vertex_start);
		223
		224	DBG(("%s: CA fixup\n", __FUNCTION__));
		225	assert(op->mask.bo != NULL);
		226	assert(op->has_component_alpha);
		227
		228	gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
		229	gen4_choose_composite_kernel(PictOpAdd,
		230	true, true, op->is_affine));
		231
		232	OUT_BATCH(GEN4_3DPRIMITIVE \|
		233	GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		234	(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		235	(0 << 9) \|
		236	4);
		237	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
		238	OUT_BATCH(sna->render.vertex_start);
		239	OUT_BATCH(1); /* single instance */
		240	OUT_BATCH(0); /* start instance location */
		241	OUT_BATCH(0); /* index buffer offset, ignored */
		242
		243	state->last_primitive = sna->kgem.nbatch;
		244	return true;
		245	}
		246
		247	static uint32_t gen4_get_blend(int op,
		248	bool has_component_alpha,
		249	uint32_t dst_format)
		250	{
		251	uint32_t src, dst;
		252
		253	src = GEN4_BLENDFACTOR_ONE; //gen4_blend_op[op].src_blend;
3769	Serge	254	dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
3291	Serge	255	#if 0
		256	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
		257	* it as always 1.
		258	*/
		259	if (PICT_FORMAT_A(dst_format) == 0) {
		260	if (src == GEN4_BLENDFACTOR_DST_ALPHA)
		261	src = GEN4_BLENDFACTOR_ONE;
		262	else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
		263	src = GEN4_BLENDFACTOR_ZERO;
		264	}
		265
		266	/* If the source alpha is being used, then we should only be in a
		267	* case where the source blend factor is 0, and the source blend
		268	* value is the mask channels multiplied by the source picture's alpha.
		269	*/
		270	if (has_component_alpha && gen4_blend_op[op].src_alpha) {
		271	if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
		272	dst = GEN4_BLENDFACTOR_SRC_COLOR;
		273	else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
		274	dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
		275	}
		276	#endif
		277	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
		278	op, dst_format, PICT_FORMAT_A(dst_format),
		279	src, dst, BLEND_OFFSET(src, dst)));
		280	return BLEND_OFFSET(src, dst);
		281	}
		282
		283	static uint32_t gen4_get_card_format(PictFormat format)
		284	{
		285	switch (format) {
		286	default:
		287	return -1;
		288	case PICT_a8r8g8b8:
		289	return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
		290	case PICT_x8r8g8b8:
		291	return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
4251	Serge	292	case PICT_a8b8g8r8:
		293	return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
		294	case PICT_x8b8g8r8:
		295	return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
		296	case PICT_a2r10g10b10:
		297	return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
		298	case PICT_x2r10g10b10:
		299	return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
		300	case PICT_r8g8b8:
		301	return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
		302	case PICT_r5g6b5:
		303	return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
		304	case PICT_a1r5g5b5:
		305	return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
3291	Serge	306	case PICT_a8:
		307	return GEN4_SURFACEFORMAT_A8_UNORM;
4251	Serge	308	case PICT_a4r4g4b4:
		309	return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
3291	Serge	310	}
		311	}
		312
		313	static uint32_t gen4_get_dest_format(PictFormat format)
		314	{
		315	switch (format) {
		316	default:
		317	return -1;
		318	case PICT_a8r8g8b8:
		319	case PICT_x8r8g8b8:
		320	return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
4251	Serge	321	case PICT_a8b8g8r8:
		322	case PICT_x8b8g8r8:
		323	return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
		324	case PICT_a2r10g10b10:
		325	case PICT_x2r10g10b10:
		326	return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
		327	case PICT_r5g6b5:
		328	return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
		329	case PICT_x1r5g5b5:
		330	case PICT_a1r5g5b5:
		331	return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
3291	Serge	332	case PICT_a8:
		333	return GEN4_SURFACEFORMAT_A8_UNORM;
4251	Serge	334	case PICT_a4r4g4b4:
		335	case PICT_x4r4g4b4:
		336	return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
3291	Serge	337	}
		338	}
		339
		340	typedef struct gen4_surface_state_padded {
		341	struct gen4_surface_state state;
		342	char pad[32 - sizeof(struct gen4_surface_state)];
		343	} gen4_surface_state_padded;
		344
		345	static void null_create(struct sna_static_stream *stream)
		346	{
		347	/* A bunch of zeros useful for legacy border color and depth-stencil */
		348	sna_static_stream_map(stream, 64, 64);
		349	}
		350
		351	static void
		352	sampler_state_init(struct gen4_sampler_state *sampler_state,
		353	sampler_filter_t filter,
		354	sampler_extend_t extend)
		355	{
		356	sampler_state->ss0.lod_preclamp = 1; /* GL mode */
		357
		358	/* We use the legacy mode to get the semantics specified by
		359	* the Render extension. */
		360	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
		361
		362	switch (filter) {
		363	default:
		364	case SAMPLER_FILTER_NEAREST:
		365	sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
		366	sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
		367	break;
		368	case SAMPLER_FILTER_BILINEAR:
		369	sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
		370	sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
		371	break;
		372	}
		373
		374	switch (extend) {
		375	default:
		376	case SAMPLER_EXTEND_NONE:
		377	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		378	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		379	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		380	break;
		381	case SAMPLER_EXTEND_REPEAT:
		382	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		383	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		384	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		385	break;
		386	case SAMPLER_EXTEND_PAD:
		387	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		388	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		389	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		390	break;
		391	case SAMPLER_EXTEND_REFLECT:
		392	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		393	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		394	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		395	break;
		396	}
		397	}
		398
		399	static uint32_t
		400	gen4_tiling_bits(uint32_t tiling)
		401	{
		402	switch (tiling) {
		403	default: assert(0);
		404	case I915_TILING_NONE: return 0;
		405	case I915_TILING_X: return GEN4_SURFACE_TILED;
		406	case I915_TILING_Y: return GEN4_SURFACE_TILED \| GEN4_SURFACE_TILED_Y;
		407	}
		408	}
		409
		410	/**
		411	* Sets up the common fields for a surface state buffer for the given
		412	* picture in the given surface state buffer.
		413	*/
		414	static uint32_t
		415	gen4_bind_bo(struct sna *sna,
		416	struct kgem_bo *bo,
		417	uint32_t width,
		418	uint32_t height,
		419	uint32_t format,
		420	bool is_dst)
		421	{
		422	uint32_t domains;
		423	uint16_t offset;
		424	uint32_t *ss;
		425
		426	assert(sna->kgem.gen != 040 \|\| !kgem_bo_is_snoop(bo));
		427
		428	/* After the first bind, we manage the cache domains within the batch */
4251	Serge	429	offset = kgem_bo_get_binding(bo, format \| is_dst << 31);
3291	Serge	430	if (offset) {
		431	if (is_dst)
		432	kgem_bo_mark_dirty(bo);
		433	return offset * sizeof(uint32_t);
		434	}
		435
		436	offset = sna->kgem.surface -=
		437	sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		438	ss = sna->kgem.batch + offset;
		439
		440	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT \|
		441	GEN4_SURFACE_BLEND_ENABLED \|
		442	format << GEN4_SURFACE_FORMAT_SHIFT);
		443
4251	Serge	444	if (is_dst) {
		445	ss[0] \|= GEN4_SURFACE_RC_READ_WRITE;
3291	Serge	446	domains = I915_GEM_DOMAIN_RENDER << 16 \| I915_GEM_DOMAIN_RENDER;
4251	Serge	447	} else
3291	Serge	448	domains = I915_GEM_DOMAIN_SAMPLER << 16;
		449	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
		450
		451	ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT \|
		452	(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
		453	ss[3] = (gen4_tiling_bits(bo->tiling) \|
		454	(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
		455	ss[4] = 0;
		456	ss[5] = 0;
		457
4251	Serge	458	kgem_bo_set_binding(bo, format \| is_dst << 31, offset);
3291	Serge	459
		460	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
		461	offset, bo->handle, ss[1],
		462	format, width, height, bo->pitch, bo->tiling,
		463	domains & 0xffff ? "render" : "sampler"));
		464
		465	return offset * sizeof(uint32_t);
		466	}
		467
		468	static void gen4_emit_vertex_buffer(struct sna *sna,
		469	const struct sna_composite_op *op)
		470	{
		471	int id = op->u.gen4.ve_id;
		472
		473	assert((sna->render.vb_id & (1 << id)) == 0);
		474
		475	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS \| 3);
		476	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) \| VB0_VERTEXDATA \|
		477	(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
		478	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
		479	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
		480	OUT_BATCH(0);
		481	OUT_BATCH(0);
		482	OUT_BATCH(0);
		483
		484	sna->render.vb_id \|= 1 << id;
		485	}
		486
		487	static void gen4_emit_primitive(struct sna *sna)
		488	{
		489	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
		490	sna->render.vertex_offset = sna->kgem.nbatch - 5;
		491	return;
		492	}
		493
		494	OUT_BATCH(GEN4_3DPRIMITIVE \|
		495	GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		496	(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		497	(0 << 9) \|
		498	4);
		499	sna->render.vertex_offset = sna->kgem.nbatch;
		500	OUT_BATCH(0); /* vertex count, to be filled in later */
		501	OUT_BATCH(sna->render.vertex_index);
		502	OUT_BATCH(1); /* single instance */
		503	OUT_BATCH(0); /* start instance location */
		504	OUT_BATCH(0); /* index buffer offset, ignored */
		505	sna->render.vertex_start = sna->render.vertex_index;
		506
		507	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
		508	}
		509
		510	static bool gen4_rectangle_begin(struct sna *sna,
		511	const struct sna_composite_op *op)
		512	{
4251	Serge	513	unsigned int id = 1 << op->u.gen4.ve_id;
3291	Serge	514	int ndwords;
		515
		516	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
		517	return true;
		518
		519	/* 7xpipelined pointers + 6xprimitive + 1xflush */
		520	ndwords = op->need_magic_ca_pass? 20 : 6;
4251	Serge	521	if ((sna->render.vb_id & id) == 0)
3291	Serge	522	ndwords += 5;
4251	Serge	523	ndwords += 2*FORCE_FLUSH;
3291	Serge	524
		525	if (!kgem_check_batch(&sna->kgem, ndwords))
		526	return false;
		527
4251	Serge	528	if ((sna->render.vb_id & id) == 0)
3291	Serge	529	gen4_emit_vertex_buffer(sna, op);
		530	if (sna->render.vertex_offset == 0)
		531	gen4_emit_primitive(sna);
		532
		533	return true;
		534	}
		535
		536	static int gen4_get_rectangles__flush(struct sna *sna,
		537	const struct sna_composite_op *op)
		538	{
		539	/* Preventing discarding new vbo after lock contention */
		540	if (sna_vertex_wait__locked(&sna->render)) {
		541	int rem = vertex_space(sna);
		542	if (rem > op->floats_per_rect)
		543	return rem;
		544	}
		545
4251	Serge	546	if (!kgem_check_batch(&sna->kgem,
		547	2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
3291	Serge	548	return 0;
		549	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
		550	return 0;
		551
		552	if (op->need_magic_ca_pass && sna->render.vbo)
		553	return 0;
		554
		555	if (sna->render.vertex_offset) {
		556	gen4_vertex_flush(sna);
		557	if (gen4_magic_ca_pass(sna, op))
		558	gen4_emit_pipelined_pointers(sna, op, op->op,
		559	op->u.gen4.wm_kernel);
		560	}
		561
		562	return gen4_vertex_finish(sna);
		563	}
		564
		565	inline static int gen4_get_rectangles(struct sna *sna,
		566	const struct sna_composite_op *op,
		567	int want,
		568	void (emit_state)(struct sna sna, const struct sna_composite_op *op))
		569	{
		570	int rem;
		571
		572	assert(want);
4251	Serge	573	#if FORCE_FLUSH
		574	rem = sna->render.vertex_offset;
		575	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
		576	rem = sna->kgem.nbatch - 5;
		577	if (rem) {
		578	rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
		579	if (rem <= 0) {
		580	if (sna->render.vertex_offset) {
		581	gen4_vertex_flush(sna);
		582	if (gen4_magic_ca_pass(sna, op))
		583	gen4_emit_pipelined_pointers(sna, op, op->op,
		584	op->u.gen4.wm_kernel);
		585	}
		586	OUT_BATCH(MI_FLUSH \| MI_INHIBIT_RENDER_CACHE_FLUSH);
		587	rem = MAX_FLUSH_VERTICES;
		588	}
		589	} else
		590	rem = MAX_FLUSH_VERTICES;
		591	if (want > rem)
		592	want = rem;
		593	#endif
3291	Serge	594
		595	start:
		596	rem = vertex_space(sna);
		597	if (unlikely(rem < op->floats_per_rect)) {
		598	DBG(("flushing vbo for %s: %d < %d\n",
		599	__FUNCTION__, rem, op->floats_per_rect));
		600	rem = gen4_get_rectangles__flush(sna, op);
		601	if (unlikely(rem == 0))
		602	goto flush;
		603	}
		604
		605	if (unlikely(sna->render.vertex_offset == 0)) {
		606	if (!gen4_rectangle_begin(sna, op))
		607	goto flush;
		608	else
		609	goto start;
		610	}
		611
		612	assert(rem <= vertex_space(sna));
4251	Serge	613	assert(op->floats_per_rect <= rem);
3291	Serge	614	if (want > 1 && want * op->floats_per_rect > rem)
		615	want = rem / op->floats_per_rect;
		616
		617	sna->render.vertex_index += 3*want;
		618	return want;
		619
		620	flush:
		621	if (sna->render.vertex_offset) {
		622	gen4_vertex_flush(sna);
		623	gen4_magic_ca_pass(sna, op);
		624	}
		625	sna_vertex_wait__locked(&sna->render);
		626	_kgem_submit(&sna->kgem);
		627	emit_state(sna, op);
		628	goto start;
		629	}
		630
		631	static uint32_t *
		632	gen4_composite_get_binding_table(struct sna sna, uint16_t offset)
		633	{
		634	sna->kgem.surface -=
		635	sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		636
		637	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
		638
		639	/* Clear all surplus entries to zero in case of prefetch */
		640	*offset = sna->kgem.surface;
		641	return memset(sna->kgem.batch + sna->kgem.surface,
		642	0, sizeof(struct gen4_surface_state_padded));
		643	}
		644
		645	static void
		646	gen4_emit_urb(struct sna *sna)
		647	{
		648	int urb_vs_start, urb_vs_size;
		649	int urb_gs_start, urb_gs_size;
		650	int urb_clip_start, urb_clip_size;
		651	int urb_sf_start, urb_sf_size;
		652	int urb_cs_start, urb_cs_size;
		653
		654	if (!sna->render_state.gen4.needs_urb)
		655	return;
		656
		657	urb_vs_start = 0;
		658	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
		659	urb_gs_start = urb_vs_start + urb_vs_size;
		660	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
		661	urb_clip_start = urb_gs_start + urb_gs_size;
		662	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
		663	urb_sf_start = urb_clip_start + urb_clip_size;
		664	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
		665	urb_cs_start = urb_sf_start + urb_sf_size;
		666	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
		667
		668	while ((sna->kgem.nbatch & 15) > 12)
		669	OUT_BATCH(MI_NOOP);
		670
		671	OUT_BATCH(GEN4_URB_FENCE \|
		672	UF0_CS_REALLOC \|
		673	UF0_SF_REALLOC \|
		674	UF0_CLIP_REALLOC \|
		675	UF0_GS_REALLOC \|
		676	UF0_VS_REALLOC \|
		677	1);
		678	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) \|
		679	((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) \|
		680	((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
		681	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) \|
		682	((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
		683
		684	/* Constant buffer state */
		685	OUT_BATCH(GEN4_CS_URB_STATE \| 0);
		686	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 \| URB_CS_ENTRIES << 0);
		687
		688	sna->render_state.gen4.needs_urb = false;
		689	}
		690
		691	static void
		692	gen4_emit_state_base_address(struct sna *sna)
		693	{
		694	assert(sna->render_state.gen4.general_bo->proxy == NULL);
		695	OUT_BATCH(GEN4_STATE_BASE_ADDRESS \| 4);
		696	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
		697	sna->kgem.nbatch,
		698	sna->render_state.gen4.general_bo,
		699	I915_GEM_DOMAIN_INSTRUCTION << 16,
		700	BASE_ADDRESS_MODIFY));
		701	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
		702	sna->kgem.nbatch,
		703	NULL,
		704	I915_GEM_DOMAIN_INSTRUCTION << 16,
		705	BASE_ADDRESS_MODIFY));
		706	OUT_BATCH(0); /* media */
		707
		708	/* upper bounds, all disabled */
		709	OUT_BATCH(BASE_ADDRESS_MODIFY);
		710	OUT_BATCH(0);
		711	}
		712
		713	static void
		714	gen4_emit_invariant(struct sna *sna)
		715	{
		716	assert(sna->kgem.surface == sna->kgem.batch_size);
		717
		718	if (sna->kgem.gen >= 045)
		719	OUT_BATCH(NEW_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		720	else
		721	OUT_BATCH(GEN4_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		722
		723	gen4_emit_state_base_address(sna);
		724
		725	sna->render_state.gen4.needs_invariant = false;
		726	}
		727
		728	static void
		729	gen4_get_batch(struct sna sna, const struct sna_composite_op op)
		730	{
		731	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
		732
4251	Serge	733	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
3291	Serge	734	DBG(("%s: flushing batch: %d < %d+%d\n",
		735	__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
		736	150, 4*8));
		737	kgem_submit(&sna->kgem);
		738	_kgem_set_mode(&sna->kgem, KGEM_RENDER);
		739	}
		740
		741	if (sna->render_state.gen4.needs_invariant)
		742	gen4_emit_invariant(sna);
		743	}
		744
		745	static void
		746	gen4_align_vertex(struct sna sna, const struct sna_composite_op op)
		747	{
		748	assert(op->floats_per_rect == 3*op->floats_per_vertex);
		749	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
		750	if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
		751	gen4_vertex_finish(sna);
		752
		753	DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
		754	sna->render_state.gen4.floats_per_vertex,
		755	op->floats_per_vertex,
		756	sna->render.vertex_index,
		757	(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
		758	sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
		759	sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
		760	sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
		761	}
		762	}
		763
		764	static void
		765	gen4_emit_binding_table(struct sna *sna, uint16_t offset)
		766	{
		767	if (sna->render_state.gen4.surface_table == offset)
		768	return;
		769
		770	sna->render_state.gen4.surface_table = offset;
		771
		772	/* Binding table pointers */
		773	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS \| 4);
		774	OUT_BATCH(0); /* vs */
		775	OUT_BATCH(0); /* gs */
		776	OUT_BATCH(0); /* clip */
		777	OUT_BATCH(0); /* sf */
		778	/* Only the PS uses the binding table */
		779	OUT_BATCH(offset*4);
		780	}
		781
		782	static void
		783	gen4_emit_pipelined_pointers(struct sna *sna,
		784	const struct sna_composite_op *op,
		785	int blend, int kernel)
		786	{
		787	uint16_t sp, bp;
		788	uint32_t key;
		789
		790	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
		791	__FUNCTION__, op->u.gen4.ve_id & 2,
		792	op->src.filter, op->src.repeat,
		793	op->mask.filter, op->mask.repeat,
		794	kernel, blend, op->has_component_alpha, (int)op->dst.format));
		795
		796	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
		797	op->mask.filter, op->mask.repeat,
		798	kernel);
		799	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
		800
		801	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
		802	key = sp \| (uint32_t)bp << 16;
		803	if (key == sna->render_state.gen4.last_pipelined_pointers)
		804	return;
		805
		806	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS \| 5);
		807	OUT_BATCH(sna->render_state.gen4.vs);
		808	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
		809	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
		810	OUT_BATCH(sna->render_state.gen4.sf);
		811	OUT_BATCH(sna->render_state.gen4.wm + sp);
		812	OUT_BATCH(sna->render_state.gen4.cc + bp);
		813
		814	sna->render_state.gen4.last_pipelined_pointers = key;
		815	gen4_emit_urb(sna);
		816	}
		817
		818	static bool
		819	gen4_emit_drawing_rectangle(struct sna sna, const struct sna_composite_op op)
		820	{
		821	uint32_t limit = (op->dst.height - 1) << 16 \| (op->dst.width - 1);
		822	uint32_t offset = (uint16_t)op->dst.y << 16 \| (uint16_t)op->dst.x;
		823
		824	assert(!too_large(op->dst.x, op->dst.y));
		825	assert(!too_large(op->dst.width, op->dst.height));
		826
		827	if (sna->render_state.gen4.drawrect_limit == limit &&
		828	sna->render_state.gen4.drawrect_offset == offset)
		829	return true;
		830
		831	sna->render_state.gen4.drawrect_offset = offset;
		832	sna->render_state.gen4.drawrect_limit = limit;
		833
		834	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE \| (4 - 2));
		835	OUT_BATCH(0);
		836	OUT_BATCH(limit);
		837	OUT_BATCH(offset);
		838	return false;
		839	}
		840
		841	static void
		842	gen4_emit_vertex_elements(struct sna *sna,
		843	const struct sna_composite_op *op)
		844	{
		845	/*
		846	* vertex data in vertex buffer
		847	* position: (x, y)
		848	* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
		849	* texture coordinate 1 if (has_mask is true): same as above
		850	*/
		851	struct gen4_render_state *render = &sna->render_state.gen4;
		852	uint32_t src_format, dw;
		853	int id = op->u.gen4.ve_id;
		854
		855	if (render->ve_id == id)
		856	return;
		857	render->ve_id = id;
		858
		859	/* The VUE layout
		860	* dword 0-3: position (x, y, 1.0, 1.0),
		861	* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
		862	* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
		863	*/
		864	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS \| (2 * (1 + 2) - 1));
		865
		866	/* x,y */
		867	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		868	GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		869
		870	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT \|
		871	VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT \|
		872	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT \|
		873	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT \|
		874	(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		875
		876	/* u0, v0, w0 */
		877	/* u0, v0, w0 */
		878	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
		879	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		880	switch (id & 3) {
		881	default:
		882	assert(0);
		883	case 0:
		884	src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
		885	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		886	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		887	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		888	break;
		889	case 1:
		890	src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
		891	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		892	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		893	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		894	break;
		895	case 2:
		896	src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
		897	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		898	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		899	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		900	break;
		901	case 3:
		902	src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
		903	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		904	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		905	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		906	break;
		907	}
		908	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		909	src_format << VE0_FORMAT_SHIFT \|
		910	4 << VE0_OFFSET_SHIFT);
		911	OUT_BATCH(dw \| 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		912
		913	/* u1, v1, w1 */
		914	if (id >> 2) {
		915	unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
		916	DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
		917	id >> 2, src_offset));
		918	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		919	switch (id >> 2) {
		920	case 1:
		921	src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
		922	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		923	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		924	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		925	break;
		926	default:
		927	assert(0);
		928	case 2:
		929	src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
		930	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		931	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		932	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		933	break;
		934	case 3:
		935	src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
		936	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		937	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		938	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		939	break;
		940	}
		941	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		942	src_format << VE0_FORMAT_SHIFT \|
		943	src_offset << VE0_OFFSET_SHIFT);
		944	OUT_BATCH(dw \| 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		945	} else {
		946	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		947	GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		948
		949	OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT \|
		950	VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT \|
		951	VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT \|
		952	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT \|
		953	12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		954	}
		955	}
		956
		957	static void
		958	gen4_emit_state(struct sna *sna,
		959	const struct sna_composite_op *op,
		960	uint16_t wm_binding_table)
		961	{
		962	bool flush;
		963
4251	Serge	964	assert(op->dst.bo->exec);
		965
3291	Serge	966	flush = wm_binding_table & 1;
		967	if (kgem_bo_is_dirty(op->src.bo) \|\| kgem_bo_is_dirty(op->mask.bo)) {
		968	DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
		969	kgem_bo_is_dirty(op->src.bo),
		970	kgem_bo_is_dirty(op->mask.bo),
		971	flush));
		972	OUT_BATCH(MI_FLUSH);
		973	kgem_clear_dirty(&sna->kgem);
		974	kgem_bo_mark_dirty(op->dst.bo);
		975	flush = false;
		976	}
		977	flush &= gen4_emit_drawing_rectangle(sna, op);
		978	if (flush && op->op > PictOpSrc)
		979	OUT_BATCH(MI_FLUSH \| MI_INHIBIT_RENDER_CACHE_FLUSH);
		980
		981	gen4_emit_binding_table(sna, wm_binding_table & ~1);
		982	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
		983	gen4_emit_vertex_elements(sna, op);
		984	}
		985
		986	static void
		987	gen4_bind_surfaces(struct sna *sna,
		988	const struct sna_composite_op *op)
		989	{
		990	bool dirty = kgem_bo_is_dirty(op->dst.bo);
		991	uint32_t *binding_table;
		992	uint16_t offset;
		993
		994	gen4_get_batch(sna, op);
		995
		996	binding_table = gen4_composite_get_binding_table(sna, &offset);
		997
		998	binding_table[0] =
		999	gen4_bind_bo(sna,
		1000	op->dst.bo, op->dst.width, op->dst.height,
		1001	gen4_get_dest_format(op->dst.format),
		1002	true);
		1003	binding_table[1] =
		1004	gen4_bind_bo(sna,
		1005	op->src.bo, op->src.width, op->src.height,
		1006	op->src.card_format,
		1007	false);
		1008	if (op->mask.bo) {
		1009	assert(op->u.gen4.ve_id >> 2);
		1010	binding_table[2] =
		1011	gen4_bind_bo(sna,
		1012	op->mask.bo,
		1013	op->mask.width,
		1014	op->mask.height,
		1015	op->mask.card_format,
		1016	false);
		1017	}
		1018
		1019	if (sna->kgem.surface == offset &&
		1020	(uint64_t )(sna->kgem.batch + sna->render_state.gen4.surface_table) == (uint64_t)binding_table &&
		1021	(op->mask.bo == NULL \|\|
		1022	sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
		1023	sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		1024	offset = sna->render_state.gen4.surface_table;
		1025	}
		1026
		1027	gen4_emit_state(sna, op, offset \| dirty);
		1028	}
		1029
		1030	fastcall static void
		1031	gen4_render_composite_blt(struct sna *sna,
		1032	const struct sna_composite_op *op,
		1033	const struct sna_composite_rectangles *r)
		1034	{
		1035	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
		1036	__FUNCTION__,
		1037	r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
		1038	r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
		1039	r->dst.x, r->dst.y, op->dst.x, op->dst.y,
		1040	r->width, r->height));
		1041
		1042	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
		1043	op->prim_emit(sna, op, r);
		1044	}
		1045
4251	Serge	1046	#if 0
		1047	fastcall static void
		1048	gen4_render_composite_box(struct sna *sna,
		1049	const struct sna_composite_op *op,
		1050	const BoxRec *box)
		1051	{
		1052	struct sna_composite_rectangles r;
3291	Serge	1053
4251	Serge	1054	DBG((" %s: (%d, %d), (%d, %d)\n",
		1055	__FUNCTION__,
		1056	box->x1, box->y1, box->x2, box->y2));
3291	Serge	1057
4251	Serge	1058	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
3291	Serge	1059
4251	Serge	1060	r.dst.x = box->x1;
		1061	r.dst.y = box->y1;
		1062	r.width = box->x2 - box->x1;
		1063	r.height = box->y2 - box->y1;
		1064	r.mask = r.src = r.dst;
3291	Serge	1065
4251	Serge	1066	op->prim_emit(sna, op, &r);
		1067	}
3291	Serge	1068
4251	Serge	1069	static void
		1070	gen4_render_composite_boxes__blt(struct sna *sna,
		1071	const struct sna_composite_op *op,
		1072	const BoxRec *box, int nbox)
		1073	{
		1074	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
		1075	__FUNCTION__, nbox, op->dst.x, op->dst.y,
		1076	op->src.offset[0], op->src.offset[1],
		1077	op->src.width, op->src.height,
		1078	op->mask.offset[0], op->mask.offset[1],
		1079	op->mask.width, op->mask.height));
3291	Serge	1080
4251	Serge	1081	do {
		1082	int nbox_this_time;
3291	Serge	1083
4251	Serge	1084	nbox_this_time = gen4_get_rectangles(sna, op, nbox,
		1085	gen4_bind_surfaces);
		1086	nbox -= nbox_this_time;
3291	Serge	1087
4251	Serge	1088	do {
		1089	struct sna_composite_rectangles r;
3291	Serge	1090
4251	Serge	1091	DBG((" %s: (%d, %d), (%d, %d)\n",
		1092	__FUNCTION__,
		1093	box->x1, box->y1, box->x2, box->y2));
3291	Serge	1094
4251	Serge	1095	r.dst.x = box->x1;
		1096	r.dst.y = box->y1;
		1097	r.width = box->x2 - box->x1;
		1098	r.height = box->y2 - box->y1;
		1099	r.mask = r.src = r.dst;
		1100	op->prim_emit(sna, op, &r);
		1101	box++;
		1102	} while (--nbox_this_time);
		1103	} while (nbox);
		1104	}
3291	Serge	1105
4251	Serge	1106	static void
		1107	gen4_render_composite_boxes(struct sna *sna,
		1108	const struct sna_composite_op *op,
		1109	const BoxRec *box, int nbox)
		1110	{
		1111	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
3291	Serge	1112
4251	Serge	1113	do {
		1114	int nbox_this_time;
		1115	float *v;
3291	Serge	1116
4251	Serge	1117	nbox_this_time = gen4_get_rectangles(sna, op, nbox,
		1118	gen4_bind_surfaces);
		1119	assert(nbox_this_time);
		1120	nbox -= nbox_this_time;
3291	Serge	1121
4251	Serge	1122	v = sna->render.vertices + sna->render.vertex_used;
		1123	sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
3291	Serge	1124
4251	Serge	1125	op->emit_boxes(op, box, nbox_this_time, v);
		1126	box += nbox_this_time;
		1127	} while (nbox);
		1128	}
3291	Serge	1129
4251	Serge	1130	#if !FORCE_FLUSH
		1131	static void
		1132	gen4_render_composite_boxes__thread(struct sna *sna,
		1133	const struct sna_composite_op *op,
		1134	const BoxRec *box, int nbox)
		1135	{
		1136	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
3291	Serge	1137
4251	Serge	1138	sna_vertex_lock(&sna->render);
		1139	do {
		1140	int nbox_this_time;
		1141	float *v;
		1142
		1143	nbox_this_time = gen4_get_rectangles(sna, op, nbox,
		1144	gen4_bind_surfaces);
		1145	assert(nbox_this_time);
		1146	nbox -= nbox_this_time;
		1147
		1148	v = sna->render.vertices + sna->render.vertex_used;
		1149	sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
		1150
		1151	sna_vertex_acquire__locked(&sna->render);
		1152	sna_vertex_unlock(&sna->render);
		1153
		1154	op->emit_boxes(op, box, nbox_this_time, v);
		1155	box += nbox_this_time;
		1156
		1157	sna_vertex_lock(&sna->render);
		1158	sna_vertex_release__locked(&sna->render);
		1159	} while (nbox);
		1160	sna_vertex_unlock(&sna->render);
		1161	}
		1162	#endif
		1163
		1164	#ifndef MAX
		1165	#define MAX(a,b) ((a) > (b) ? (a) : (b))
		1166	#endif
		1167
		1168	static uint32_t gen4_bind_video_source(struct sna *sna,
		1169	struct kgem_bo *src_bo,
		1170	uint32_t src_offset,
		1171	int src_width,
		1172	int src_height,
		1173	int src_pitch,
		1174	uint32_t src_surf_format)
		1175	{
		1176	struct gen4_surface_state *ss;
		1177
		1178	sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		1179
		1180	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
		1181	ss->ss0.surface_type = GEN4_SURFACE_2D;
		1182	ss->ss0.surface_format = src_surf_format;
		1183	ss->ss0.color_blend = 1;
		1184
		1185	ss->ss1.base_addr =
		1186	kgem_add_reloc(&sna->kgem,
		1187	sna->kgem.surface + 1,
		1188	src_bo,
		1189	I915_GEM_DOMAIN_SAMPLER << 16,
		1190	src_offset);
		1191
		1192	ss->ss2.width = src_width - 1;
		1193	ss->ss2.height = src_height - 1;
		1194	ss->ss3.pitch = src_pitch - 1;
		1195
		1196	return sna->kgem.surface * sizeof(uint32_t);
		1197	}
		1198
		1199	static void gen4_video_bind_surfaces(struct sna *sna,
		1200	const struct sna_composite_op *op)
		1201	{
		1202	bool dirty = kgem_bo_is_dirty(op->dst.bo);
		1203	struct sna_video_frame *frame = op->priv;
		1204	uint32_t src_surf_format;
		1205	uint32_t src_surf_base[6];
		1206	int src_width[6];
		1207	int src_height[6];
		1208	int src_pitch[6];
		1209	uint32_t *binding_table;
		1210	uint16_t offset;
		1211	int n_src, n;
		1212
		1213	src_surf_base[0] = 0;
		1214	src_surf_base[1] = 0;
		1215	src_surf_base[2] = frame->VBufOffset;
		1216	src_surf_base[3] = frame->VBufOffset;
		1217	src_surf_base[4] = frame->UBufOffset;
		1218	src_surf_base[5] = frame->UBufOffset;
		1219
		1220	if (is_planar_fourcc(frame->id)) {
		1221	src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
		1222	src_width[1] = src_width[0] = frame->width;
		1223	src_height[1] = src_height[0] = frame->height;
		1224	src_pitch[1] = src_pitch[0] = frame->pitch[1];
		1225	src_width[4] = src_width[5] = src_width[2] = src_width[3] =
		1226	frame->width / 2;
		1227	src_height[4] = src_height[5] = src_height[2] = src_height[3] =
		1228	frame->height / 2;
		1229	src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
		1230	frame->pitch[0];
		1231	n_src = 6;
		1232	} else {
		1233	if (frame->id == FOURCC_UYVY)
		1234	src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
		1235	else
		1236	src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
		1237
		1238	src_width[0] = frame->width;
		1239	src_height[0] = frame->height;
		1240	src_pitch[0] = frame->pitch[0];
		1241	n_src = 1;
		1242	}
		1243
		1244	gen4_get_batch(sna, op);
		1245
		1246	binding_table = gen4_composite_get_binding_table(sna, &offset);
		1247	binding_table[0] =
		1248	gen4_bind_bo(sna,
		1249	op->dst.bo, op->dst.width, op->dst.height,
		1250	gen4_get_dest_format(op->dst.format),
		1251	true);
		1252	for (n = 0; n < n_src; n++) {
		1253	binding_table[1+n] =
		1254	gen4_bind_video_source(sna,
		1255	frame->bo,
		1256	src_surf_base[n],
		1257	src_width[n],
		1258	src_height[n],
		1259	src_pitch[n],
		1260	src_surf_format);
		1261	}
		1262
		1263	gen4_emit_state(sna, op, offset \| dirty);
		1264	}
		1265
		1266	static bool
		1267	gen4_render_video(struct sna *sna,
		1268	struct sna_video *video,
		1269	struct sna_video_frame *frame,
		1270	RegionPtr dstRegion,
		1271	PixmapPtr pixmap)
		1272	{
		1273	struct sna_composite_op tmp;
		1274	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
		1275	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
		1276	int src_width = frame->src.x2 - frame->src.x1;
		1277	int src_height = frame->src.y2 - frame->src.y1;
		1278	float src_offset_x, src_offset_y;
		1279	float src_scale_x, src_scale_y;
		1280	int nbox, pix_xoff, pix_yoff;
		1281	struct sna_pixmap *priv;
		1282	BoxPtr box;
		1283
		1284	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
		1285	src_width, src_height, dst_width, dst_height));
		1286
		1287	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ \| MOVE_WRITE);
		1288	if (priv == NULL)
		1289	return false;
		1290
		1291	memset(&tmp, 0, sizeof(tmp));
		1292
		1293	tmp.op = PictOpSrc;
		1294	tmp.dst.pixmap = pixmap;
		1295	tmp.dst.width = pixmap->drawable.width;
		1296	tmp.dst.height = pixmap->drawable.height;
		1297	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
		1298	tmp.dst.bo = priv->gpu_bo;
		1299
		1300	if (src_width == dst_width && src_height == dst_height)
		1301	tmp.src.filter = SAMPLER_FILTER_NEAREST;
		1302	else
		1303	tmp.src.filter = SAMPLER_FILTER_BILINEAR;
		1304	tmp.src.repeat = SAMPLER_EXTEND_PAD;
		1305	tmp.src.bo = frame->bo;
		1306	tmp.mask.bo = NULL;
		1307	tmp.u.gen4.wm_kernel =
		1308	is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
		1309	tmp.u.gen4.ve_id = 2;
		1310	tmp.is_affine = true;
		1311	tmp.floats_per_vertex = 3;
		1312	tmp.floats_per_rect = 9;
		1313	tmp.priv = frame;
		1314
		1315	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
		1316	kgem_submit(&sna->kgem);
		1317	assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
		1318	}
		1319
		1320	gen4_video_bind_surfaces(sna, &tmp);
		1321	gen4_align_vertex(sna, &tmp);
		1322
		1323	/* Set up the offset for translating from the given region (in screen
		1324	* coordinates) to the backing pixmap.
		1325	*/
		1326	#ifdef COMPOSITE
		1327	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
		1328	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
		1329	#else
		1330	pix_xoff = 0;
		1331	pix_yoff = 0;
		1332	#endif
		1333
		1334	src_scale_x = (float)src_width / dst_width / frame->width;
		1335	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
		1336
		1337	src_scale_y = (float)src_height / dst_height / frame->height;
		1338	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
		1339
		1340	box = REGION_RECTS(dstRegion);
		1341	nbox = REGION_NUM_RECTS(dstRegion);
		1342	do {
		1343	int n;
		1344
		1345	n = gen4_get_rectangles(sna, &tmp, nbox,
		1346	gen4_video_bind_surfaces);
		1347	assert(n);
		1348	nbox -= n;
		1349
		1350	do {
		1351	BoxRec r;
		1352
		1353	r.x1 = box->x1 + pix_xoff;
		1354	r.x2 = box->x2 + pix_xoff;
		1355	r.y1 = box->y1 + pix_yoff;
		1356	r.y2 = box->y2 + pix_yoff;
		1357
		1358	OUT_VERTEX(r.x2, r.y2);
		1359	OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
		1360	OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
		1361
		1362	OUT_VERTEX(r.x1, r.y2);
		1363	OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
		1364	OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
		1365
		1366	OUT_VERTEX(r.x1, r.y1);
		1367	OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
		1368	OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
		1369
		1370	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
		1371	sna_damage_add_box(&priv->gpu_damage, &r);
		1372	sna_damage_subtract_box(&priv->cpu_damage, &r);
		1373	}
		1374	box++;
		1375	} while (--n);
		1376	} while (nbox);
		1377	gen4_vertex_flush(sna);
		1378
		1379	return true;
		1380	}
		1381
		1382	static int
		1383	gen4_composite_picture(struct sna *sna,
		1384	PicturePtr picture,
		1385	struct sna_composite_channel *channel,
		1386	int x, int y,
		1387	int w, int h,
		1388	int dst_x, int dst_y,
		1389	bool precise)
		1390	{
		1391	PixmapPtr pixmap;
		1392	uint32_t color;
		1393	int16_t dx, dy;
		1394
		1395	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
		1396	__FUNCTION__, x, y, w, h, dst_x, dst_y));
		1397
		1398	channel->is_solid = false;
		1399	channel->card_format = -1;
		1400
		1401	if (sna_picture_is_solid(picture, &color))
		1402	return gen4_channel_init_solid(sna, channel, color);
		1403
		1404	if (picture->pDrawable == NULL) {
		1405	int ret;
		1406
		1407	if (picture->pSourcePict->type == SourcePictTypeLinear)
		1408	return gen4_channel_init_linear(sna, picture, channel,
		1409	x, y,
		1410	w, h,
		1411	dst_x, dst_y);
		1412
		1413	DBG(("%s -- fixup, gradient\n", __FUNCTION__));
		1414	ret = -1;
		1415	if (!precise)
		1416	ret = sna_render_picture_approximate_gradient(sna, picture, channel,
		1417	x, y, w, h, dst_x, dst_y);
		1418	if (ret == -1)
		1419	ret = sna_render_picture_fixup(sna, picture, channel,
		1420	x, y, w, h, dst_x, dst_y);
		1421	return ret;
		1422	}
		1423
		1424	if (picture->alphaMap) {
		1425	DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
		1426	return sna_render_picture_fixup(sna, picture, channel,
		1427	x, y, w, h, dst_x, dst_y);
		1428	}
		1429
		1430	if (!gen4_check_repeat(picture)) {
		1431	DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
		1432	return sna_render_picture_fixup(sna, picture, channel,
		1433	x, y, w, h, dst_x, dst_y);
		1434	}
		1435
		1436	if (!gen4_check_filter(picture)) {
		1437	DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
		1438	return sna_render_picture_fixup(sna, picture, channel,
		1439	x, y, w, h, dst_x, dst_y);
		1440	}
		1441
		1442	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
		1443	channel->filter = picture->filter;
		1444
		1445	pixmap = get_drawable_pixmap(picture->pDrawable);
		1446	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
		1447
		1448	x += dx + picture->pDrawable->x;
		1449	y += dy + picture->pDrawable->y;
		1450
		1451	channel->is_affine = sna_transform_is_affine(picture->transform);
		1452	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
		1453	DBG(("%s: integer translation (%d, %d), removing\n",
		1454	__FUNCTION__, dx, dy));
		1455	x += dx;
		1456	y += dy;
		1457	channel->transform = NULL;
		1458	channel->filter = PictFilterNearest;
		1459	} else
		1460	channel->transform = picture->transform;
		1461
		1462	channel->pict_format = picture->format;
		1463	channel->card_format = gen4_get_card_format(picture->format);
		1464	if (channel->card_format == -1)
		1465	return sna_render_picture_convert(sna, picture, channel, pixmap,
		1466	x, y, w, h, dst_x, dst_y,
		1467	false);
		1468
		1469	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
		1470	return sna_render_picture_extract(sna, picture, channel,
		1471	x, y, w, h, dst_x, dst_y);
		1472
		1473	return sna_render_pixmap_bo(sna, channel, pixmap,
		1474	x, y, w, h, dst_x, dst_y);
		1475	}
		1476
		1477	static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
		1478	{
		1479	DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
		1480	__FUNCTION__,
		1481	channel->repeat, gen4_repeat(channel->repeat),
		1482	channel->filter, gen4_repeat(channel->filter)));
		1483	channel->repeat = gen4_repeat(channel->repeat);
		1484	channel->filter = gen4_filter(channel->filter);
		1485	if (channel->card_format == (unsigned)-1)
		1486	channel->card_format = gen4_get_card_format(channel->pict_format);
		1487	}
		1488	#endif
		1489
3291	Serge	1490	static void
		1491	gen4_render_composite_done(struct sna *sna,
		1492	const struct sna_composite_op *op)
		1493	{
		1494	DBG(("%s()\n", __FUNCTION__));
		1495
		1496	if (sna->render.vertex_offset) {
		1497	gen4_vertex_flush(sna);
		1498	gen4_magic_ca_pass(sna, op);
		1499	}
		1500
		1501	}
		1502
4251	Serge	1503	#if 0
		1504	static bool
		1505	gen4_composite_set_target(struct sna *sna,
		1506	struct sna_composite_op *op,
		1507	PicturePtr dst,
		1508	int x, int y, int w, int h,
		1509	bool partial)
		1510	{
		1511	BoxRec box;
3291	Serge	1512
4251	Serge	1513	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
		1514	op->dst.width = op->dst.pixmap->drawable.width;
		1515	op->dst.height = op->dst.pixmap->drawable.height;
		1516	op->dst.format = dst->format;
		1517	if (w && h) {
		1518	box.x1 = x;
		1519	box.y1 = y;
		1520	box.x2 = x + w;
		1521	box.y2 = y + h;
		1522	} else
		1523	sna_render_picture_extents(dst, &box);
3291	Serge	1524
4251	Serge	1525	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
		1526	PREFER_GPU \| FORCE_GPU \| RENDER_GPU,
		1527	&box, &op->damage);
		1528	if (op->dst.bo == NULL)
		1529	return false;
3291	Serge	1530
4251	Serge	1531	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
		1532	&op->dst.x, &op->dst.y);
3291	Serge	1533
4251	Serge	1534	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
		1535	__FUNCTION__,
		1536	op->dst.pixmap, (int)op->dst.format,
		1537	op->dst.width, op->dst.height,
		1538	op->dst.bo->pitch,
		1539	op->dst.x, op->dst.y,
		1540	op->damage ? op->damage : (void )-1));
3291	Serge	1541
4251	Serge	1542	assert(op->dst.bo->proxy == NULL);
3291	Serge	1543
4251	Serge	1544	if (too_large(op->dst.width, op->dst.height) &&
		1545	!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
		1546	return false;
3291	Serge	1547
4251	Serge	1548	return true;
		1549	}
3291	Serge	1550
4251	Serge	1551	static bool
		1552	try_blt(struct sna *sna,
		1553	PicturePtr dst, PicturePtr src,
		1554	int width, int height)
		1555	{
		1556	if (sna->kgem.mode != KGEM_RENDER) {
		1557	DBG(("%s: already performing BLT\n", __FUNCTION__));
		1558	return true;
		1559	}
3291	Serge	1560
4251	Serge	1561	if (too_large(width, height)) {
		1562	DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
		1563	__FUNCTION__, width, height));
		1564	return true;
		1565	}
3291	Serge	1566
4251	Serge	1567	if (too_large(dst->pDrawable->width, dst->pDrawable->height))
		1568	return true;
3291	Serge	1569
4251	Serge	1570	/* The blitter is much faster for solids */
		1571	if (sna_picture_is_solid(src, NULL))
		1572	return true;
3291	Serge	1573
4251	Serge	1574	/* is the source picture only in cpu memory e.g. a shm pixmap? */
		1575	return picture_is_cpu(sna, src);
		1576	}
3291	Serge	1577
4251	Serge	1578	static bool
		1579	check_gradient(PicturePtr picture, bool precise)
		1580	{
		1581	switch (picture->pSourcePict->type) {
		1582	case SourcePictTypeSolidFill:
		1583	case SourcePictTypeLinear:
		1584	return false;
		1585	default:
		1586	return precise;
		1587	}
		1588	}
3291	Serge	1589
4251	Serge	1590	static bool
		1591	has_alphamap(PicturePtr p)
		1592	{
		1593	return p->alphaMap != NULL;
		1594	}
3291	Serge	1595
4251	Serge	1596	static bool
		1597	need_upload(struct sna *sna, PicturePtr p)
		1598	{
		1599	return p->pDrawable && untransformed(p) &&
		1600	!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
		1601	}
3291	Serge	1602
4251	Serge	1603	static bool
		1604	source_is_busy(PixmapPtr pixmap)
		1605	{
		1606	struct sna_pixmap *priv = sna_pixmap(pixmap);
		1607	if (priv == NULL)
		1608	return false;
3291	Serge	1609
4251	Serge	1610	if (priv->clear)
		1611	return false;
3291	Serge	1612
4251	Serge	1613	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
		1614	return true;
3291	Serge	1615
4251	Serge	1616	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
		1617	return true;
3291	Serge	1618
4251	Serge	1619	return priv->gpu_damage && !priv->cpu_damage;
		1620	}
3291	Serge	1621
4251	Serge	1622	static bool
		1623	source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
		1624	{
		1625	if (sna_picture_is_solid(p, NULL))
		1626	return false;
3291	Serge	1627
4251	Serge	1628	if (p->pSourcePict)
		1629	return check_gradient(p, precise);
3291	Serge	1630
4251	Serge	1631	if (!gen4_check_repeat(p) \|\| !gen4_check_format(p->format))
		1632	return true;
3291	Serge	1633
4251	Serge	1634	/* soft errors: perfer to upload/compute rather than readback */
		1635	if (pixmap && source_is_busy(pixmap))
		1636	return false;
3291	Serge	1637
4251	Serge	1638	return has_alphamap(p) \|\| !gen4_check_filter(p) \|\| need_upload(sna, p);
		1639	}
3291	Serge	1640
4251	Serge	1641	static bool
		1642	gen4_composite_fallback(struct sna *sna,
		1643	PicturePtr src,
		1644	PicturePtr mask,
		1645	PicturePtr dst)
		1646	{
		1647	PixmapPtr src_pixmap;
		1648	PixmapPtr mask_pixmap;
		1649	PixmapPtr dst_pixmap;
		1650	bool src_fallback, mask_fallback;
3291	Serge	1651
4251	Serge	1652	if (!gen4_check_dst_format(dst->format)) {
		1653	DBG(("%s: unknown destination format: %d\n",
		1654	__FUNCTION__, dst->format));
		1655	return true;
		1656	}
3291	Serge	1657
4251	Serge	1658	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
3291	Serge	1659
4251	Serge	1660	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
		1661	src_fallback = source_fallback(sna, src, src_pixmap,
		1662	dst->polyMode == PolyModePrecise);
3291	Serge	1663
4251	Serge	1664	if (mask) {
		1665	mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
		1666	mask_fallback = source_fallback(sna, mask, mask_pixmap,
		1667	dst->polyMode == PolyModePrecise);
		1668	} else {
		1669	mask_pixmap = NULL;
		1670	mask_fallback = false;
		1671	}
3291	Serge	1672
4251	Serge	1673	/* If we are using the destination as a source and need to
		1674	* readback in order to upload the source, do it all
		1675	* on the cpu.
		1676	*/
		1677	if (src_pixmap == dst_pixmap && src_fallback) {
		1678	DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
		1679	return true;
		1680	}
		1681	if (mask_pixmap == dst_pixmap && mask_fallback) {
		1682	DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
		1683	return true;
		1684	}
3291	Serge	1685
4251	Serge	1686	/* If anything is on the GPU, push everything out to the GPU */
		1687	if (dst_use_gpu(dst_pixmap)) {
		1688	DBG(("%s: dst is already on the GPU, try to use GPU\n",
		1689	__FUNCTION__));
		1690	return false;
		1691	}
3291	Serge	1692
4251	Serge	1693	if (src_pixmap && !src_fallback) {
		1694	DBG(("%s: src is already on the GPU, try to use GPU\n",
		1695	__FUNCTION__));
		1696	return false;
		1697	}
		1698	if (mask_pixmap && !mask_fallback) {
		1699	DBG(("%s: mask is already on the GPU, try to use GPU\n",
		1700	__FUNCTION__));
		1701	return false;
		1702	}
3291	Serge	1703
4251	Serge	1704	/* However if the dst is not on the GPU and we need to
		1705	* render one of the sources using the CPU, we may
		1706	* as well do the entire operation in place onthe CPU.
		1707	*/
		1708	if (src_fallback) {
		1709	DBG(("%s: dst is on the CPU and src will fallback\n",
		1710	__FUNCTION__));
		1711	return true;
		1712	}
3291	Serge	1713
4251	Serge	1714	if (mask_fallback) {
		1715	DBG(("%s: dst is on the CPU and mask will fallback\n",
		1716	__FUNCTION__));
		1717	return true;
		1718	}
3291	Serge	1719
4251	Serge	1720	if (too_large(dst_pixmap->drawable.width,
		1721	dst_pixmap->drawable.height) &&
		1722	dst_is_cpu(dst_pixmap)) {
		1723	DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
		1724	return true;
		1725	}
3291	Serge	1726
4251	Serge	1727	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
		1728	__FUNCTION__));
		1729	return dst_use_cpu(dst_pixmap);
		1730	}
3291	Serge	1731
4251	Serge	1732	static int
		1733	reuse_source(struct sna *sna,
		1734	PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
		1735	PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
		1736	{
		1737	uint32_t color;
3291	Serge	1738
4251	Serge	1739	if (src_x != msk_x \|\| src_y != msk_y)
		1740	return false;
3291	Serge	1741
4251	Serge	1742	if (src == mask) {
		1743	DBG(("%s: mask is source\n", __FUNCTION__));
		1744	mc = sc;
		1745	mc->bo = kgem_bo_reference(mc->bo);
		1746	return true;
		1747	}
		1748
		1749	if (sna_picture_is_solid(mask, &color))
		1750	return gen4_channel_init_solid(sna, mc, color);
		1751
		1752	if (sc->is_solid)
		1753	return false;
		1754
		1755	if (src->pDrawable == NULL \|\| mask->pDrawable != src->pDrawable)
		1756	return false;
		1757
		1758	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
		1759
		1760	if (!sna_transform_equal(src->transform, mask->transform))
		1761	return false;
		1762
		1763	if (!sna_picture_alphamap_equal(src, mask))
		1764	return false;
		1765
		1766	if (!gen4_check_repeat(mask))
		1767	return false;
		1768
		1769	if (!gen4_check_filter(mask))
		1770	return false;
		1771
		1772	if (!gen4_check_format(mask->format))
		1773	return false;
		1774
		1775	DBG(("%s: reusing source channel for mask with a twist\n",
		1776	__FUNCTION__));
		1777
		1778	mc = sc;
		1779	mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
		1780	mc->filter = gen4_filter(mask->filter);
		1781	mc->pict_format = mask->format;
		1782	mc->card_format = gen4_get_card_format(mask->format);
		1783	mc->bo = kgem_bo_reference(mc->bo);
		1784	return true;
		1785	}
		1786
3291	Serge	1787	static bool
4251	Serge	1788	gen4_render_composite(struct sna *sna,
		1789	uint8_t op,
		1790	PicturePtr src,
		1791	PicturePtr mask,
		1792	PicturePtr dst,
		1793	int16_t src_x, int16_t src_y,
		1794	int16_t msk_x, int16_t msk_y,
		1795	int16_t dst_x, int16_t dst_y,
		1796	int16_t width, int16_t height,
		1797	struct sna_composite_op *tmp)
3291	Serge	1798	{
4251	Serge	1799	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
		1800	width, height, sna->kgem.mode));
3291	Serge	1801
4251	Serge	1802	if (op >= ARRAY_SIZE(gen4_blend_op))
		1803	return false;
3291	Serge	1804
4251	Serge	1805	if (mask == NULL &&
		1806	try_blt(sna, dst, src, width, height) &&
		1807	sna_blt_composite(sna, op,
		1808	src, dst,
		1809	src_x, src_y,
		1810	dst_x, dst_y,
		1811	width, height,
		1812	tmp, false))
		1813	return true;
3291	Serge	1814
4251	Serge	1815	if (gen4_composite_fallback(sna, src, mask, dst))
		1816	return false;
3291	Serge	1817
4251	Serge	1818	if (need_tiling(sna, width, height))
		1819	return sna_tiling_composite(op, src, mask, dst,
		1820	src_x, src_y,
		1821	msk_x, msk_y,
		1822	dst_x, dst_y,
		1823	width, height,
		1824	tmp);
3291	Serge	1825
4251	Serge	1826	if (!gen4_composite_set_target(sna, tmp, dst,
		1827	dst_x, dst_y, width, height,
		1828	op > PictOpSrc \|\| dst->pCompositeClip->data)) {
		1829	DBG(("%s: failed to set composite target\n", __FUNCTION__));
		1830	return false;
		1831	}
3291	Serge	1832
4251	Serge	1833	tmp->op = op;
		1834	switch (gen4_composite_picture(sna, src, &tmp->src,
		1835	src_x, src_y,
		1836	width, height,
		1837	dst_x, dst_y,
		1838	dst->polyMode == PolyModePrecise)) {
		1839	case -1:
		1840	DBG(("%s: failed to prepare source\n", __FUNCTION__));
		1841	goto cleanup_dst;
		1842	case 0:
		1843	if (!gen4_channel_init_solid(sna, &tmp->src, 0))
		1844	goto cleanup_dst;
		1845	/* fall through to fixup */
		1846	case 1:
		1847	if (mask == NULL &&
		1848	sna_blt_composite__convert(sna,
		1849	dst_x, dst_y, width, height,
		1850	tmp))
		1851	return true;
3291	Serge	1852
4251	Serge	1853	gen4_composite_channel_convert(&tmp->src);
		1854	break;
		1855	}
		1856
3291	Serge	1857	tmp->is_affine = tmp->src.is_affine;
		1858	tmp->has_component_alpha = false;
		1859	tmp->need_magic_ca_pass = false;
		1860
4251	Serge	1861	if (mask) {
		1862	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
		1863	tmp->has_component_alpha = true;
3291	Serge	1864
4251	Serge	1865	/* Check if it's component alpha that relies on a source alpha and on
		1866	* the source value. We can only get one of those into the single
		1867	* source value that we get to blend with.
		1868	*/
		1869	if (gen4_blend_op[op].src_alpha &&
		1870	(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
		1871	if (op != PictOpOver) {
		1872	DBG(("%s -- fallback: unhandled component alpha blend\n",
		1873	__FUNCTION__));
3291	Serge	1874
4251	Serge	1875	goto cleanup_src;
		1876	}
3291	Serge	1877
4251	Serge	1878	tmp->need_magic_ca_pass = true;
		1879	tmp->op = PictOpOutReverse;
		1880	}
		1881	}
3291	Serge	1882
4251	Serge	1883	if (!reuse_source(sna,
		1884	src, &tmp->src, src_x, src_y,
		1885	mask, &tmp->mask, msk_x, msk_y)) {
		1886	switch (gen4_composite_picture(sna, mask, &tmp->mask,
		1887	msk_x, msk_y,
		1888	width, height,
		1889	dst_x, dst_y,
		1890	dst->polyMode == PolyModePrecise)) {
		1891	case -1:
		1892	DBG(("%s: failed to prepare mask\n", __FUNCTION__));
		1893	goto cleanup_src;
		1894	case 0:
		1895	if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
		1896	goto cleanup_src;
		1897	/* fall through to fixup */
		1898	case 1:
		1899	gen4_composite_channel_convert(&tmp->mask);
		1900	break;
		1901	}
		1902	}
3291	Serge	1903
4251	Serge	1904	tmp->is_affine &= tmp->mask.is_affine;
		1905	}
		1906
3291	Serge	1907	tmp->u.gen4.wm_kernel =
		1908	gen4_choose_composite_kernel(tmp->op,
		1909	tmp->mask.bo != NULL,
		1910	tmp->has_component_alpha,
		1911	tmp->is_affine);
4251	Serge	1912	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
3291	Serge	1913
		1914	tmp->blt = gen4_render_composite_blt;
4251	Serge	1915	tmp->box = gen4_render_composite_box;
		1916	tmp->boxes = gen4_render_composite_boxes__blt;
		1917	if (tmp->emit_boxes) {
		1918	tmp->boxes = gen4_render_composite_boxes;
		1919	#if !FORCE_FLUSH
		1920	tmp->thread_boxes = gen4_render_composite_boxes__thread;
		1921	#endif
		1922	}
3291	Serge	1923	tmp->done = gen4_render_composite_done;
		1924
		1925	if (!kgem_check_bo(&sna->kgem,
		1926	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		1927	NULL)) {
		1928	kgem_submit(&sna->kgem);
4251	Serge	1929	if (!kgem_check_bo(&sna->kgem,
		1930	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		1931	NULL))
		1932	goto cleanup_mask;
3291	Serge	1933	}
		1934
		1935	gen4_bind_surfaces(sna, tmp);
		1936	gen4_align_vertex(sna, tmp);
		1937	return true;
4251	Serge	1938
		1939	cleanup_mask:
		1940	if (tmp->mask.bo)
		1941	kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
		1942	cleanup_src:
		1943	if (tmp->src.bo)
		1944	kgem_bo_destroy(&sna->kgem, tmp->src.bo);
		1945	cleanup_dst:
		1946	if (tmp->redirect.real_bo)
		1947	kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
		1948	return false;
3291	Serge	1949	}
		1950
4251	Serge	1951	#endif
		1952
		1953
		1954
		1955
		1956
		1957
		1958
		1959
		1960
		1961
		1962
		1963
		1964
		1965
		1966
		1967
		1968
		1969
		1970
		1971
		1972
		1973
		1974
		1975
		1976
		1977
		1978
		1979
		1980
		1981
		1982
		1983
		1984
		1985
		1986
		1987
		1988
		1989
		1990
		1991
		1992
3291	Serge	1993	static void
		1994	gen4_render_flush(struct sna *sna)
		1995	{
		1996	gen4_vertex_close(sna);
		1997
		1998	assert(sna->render.vb_id == 0);
		1999	assert(sna->render.vertex_offset == 0);
		2000	}
		2001
		2002	static void
		2003	discard_vbo(struct sna *sna)
		2004	{
		2005	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
		2006	sna->render.vbo = NULL;
		2007	sna->render.vertices = sna->render.vertex_data;
		2008	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
		2009	sna->render.vertex_used = 0;
		2010	sna->render.vertex_index = 0;
		2011	}
		2012
		2013	static void
		2014	gen4_render_retire(struct kgem *kgem)
		2015	{
		2016	struct sna *sna;
		2017
		2018	sna = container_of(kgem, struct sna, kgem);
		2019	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
		2020	DBG(("%s: resetting idle vbo\n", __FUNCTION__));
		2021	sna->render.vertex_used = 0;
		2022	sna->render.vertex_index = 0;
		2023	}
		2024	}
		2025
		2026	static void
		2027	gen4_render_expire(struct kgem *kgem)
		2028	{
		2029	struct sna *sna;
		2030
		2031	sna = container_of(kgem, struct sna, kgem);
		2032	if (sna->render.vbo && !sna->render.vertex_used) {
		2033	DBG(("%s: discarding vbo\n", __FUNCTION__));
		2034	discard_vbo(sna);
		2035	}
		2036	}
		2037
		2038	static void gen4_render_reset(struct sna *sna)
		2039	{
		2040	sna->render_state.gen4.needs_invariant = true;
		2041	sna->render_state.gen4.needs_urb = true;
		2042	sna->render_state.gen4.ve_id = -1;
		2043	sna->render_state.gen4.last_primitive = -1;
		2044	sna->render_state.gen4.last_pipelined_pointers = -1;
		2045
		2046	sna->render_state.gen4.drawrect_offset = -1;
		2047	sna->render_state.gen4.drawrect_limit = -1;
		2048	sna->render_state.gen4.surface_table = -1;
		2049
		2050	if (sna->render.vbo &&
		2051	!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
		2052	DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
		2053	discard_vbo(sna);
		2054	}
		2055
		2056	sna->render.vertex_offset = 0;
		2057	sna->render.nvertex_reloc = 0;
		2058	sna->render.vb_id = 0;
		2059	}
		2060
		2061	static void gen4_render_fini(struct sna *sna)
		2062	{
		2063	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
		2064	}
		2065
		2066	static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
		2067	{
		2068	struct gen4_vs_unit_state vs = sna_static_stream_map(stream, sizeof(vs), 32);
		2069
		2070	/* Set up the vertex shader to be disabled (passthrough) */
		2071	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
		2072	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
		2073	vs->vs6.vs_enable = 0;
		2074	vs->vs6.vert_cache_disable = 1;
		2075
		2076	return sna_static_stream_offsetof(stream, vs);
		2077	}
		2078
		2079	static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
4251	Serge	2080	uint32_t kernel)
3291	Serge	2081	{
		2082	struct gen4_sf_unit_state *sf;
		2083
		2084	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
		2085
		2086	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
		2087	sf->thread0.kernel_start_pointer = kernel >> 6;
		2088	sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
		2089	sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
		2090	sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
		2091	/* don't smash vertex header, read start from dw8 */
		2092	sf->thread3.urb_entry_read_offset = 1;
		2093	sf->thread3.dispatch_grf_start_reg = 3;
		2094	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
		2095	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
		2096	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
		2097	sf->sf5.viewport_transform = false; /* skip viewport */
		2098	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
		2099	sf->sf6.scissor = 0;
		2100	sf->sf7.trifan_pv = 2;
		2101	sf->sf6.dest_org_vbias = 0x8;
		2102	sf->sf6.dest_org_hbias = 0x8;
		2103
		2104	return sna_static_stream_offsetof(stream, sf);
		2105	}
		2106
		2107	static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
		2108	sampler_filter_t src_filter,
		2109	sampler_extend_t src_extend,
		2110	sampler_filter_t mask_filter,
		2111	sampler_extend_t mask_extend)
		2112	{
		2113	struct gen4_sampler_state *sampler_state;
		2114
		2115	sampler_state = sna_static_stream_map(stream,
		2116	sizeof(struct gen4_sampler_state) * 2,
		2117	32);
		2118	sampler_state_init(&sampler_state[0], src_filter, src_extend);
		2119	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
		2120
		2121	return sna_static_stream_offsetof(stream, sampler_state);
		2122	}
		2123
		2124	static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
		2125	int gen,
		2126	bool has_mask,
		2127	uint32_t kernel,
		2128	uint32_t sampler)
		2129	{
		2130	assert((kernel & 63) == 0);
		2131	wm->thread0.kernel_start_pointer = kernel >> 6;
		2132	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
		2133
		2134	wm->thread1.single_program_flow = 0;
		2135
		2136	wm->thread3.const_urb_entry_read_length = 0;
		2137	wm->thread3.const_urb_entry_read_offset = 0;
		2138
		2139	wm->thread3.urb_entry_read_offset = 0;
		2140	wm->thread3.dispatch_grf_start_reg = 3;
		2141
		2142	assert((sampler & 31) == 0);
		2143	wm->wm4.sampler_state_pointer = sampler >> 5;
		2144	wm->wm4.sampler_count = 1;
		2145
		2146	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
		2147	wm->wm5.transposed_urb_read = 0;
		2148	wm->wm5.thread_dispatch_enable = 1;
		2149	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
		2150	* start point
		2151	*/
		2152	wm->wm5.enable_16_pix = 1;
		2153	wm->wm5.enable_8_pix = 0;
		2154	wm->wm5.early_depth_test = 1;
		2155
		2156	/* Each pair of attributes (src/mask coords) is two URB entries */
		2157	if (has_mask) {
		2158	wm->thread1.binding_table_entry_count = 3;
		2159	wm->thread3.urb_entry_read_length = 4;
		2160	} else {
		2161	wm->thread1.binding_table_entry_count = 2;
		2162	wm->thread3.urb_entry_read_length = 2;
		2163	}
		2164	}
		2165
		2166	static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
		2167	{
		2168	uint8_t ptr, base;
		2169	int i, j;
		2170
		2171	base = ptr =
		2172	sna_static_stream_map(stream,
		2173	GEN4_BLENDFACTOR_COUNTGEN4_BLENDFACTOR_COUNT64,
		2174	64);
		2175
		2176	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
		2177	for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
		2178	struct gen4_cc_unit_state *state =
		2179	(struct gen4_cc_unit_state *)ptr;
		2180
		2181	state->cc3.blend_enable =
		2182	!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
		2183
		2184	state->cc5.logicop_func = 0xc; /* COPY */
		2185	state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
		2186
		2187	/* Fill in alpha blend factors same as color, for the future. */
		2188	state->cc5.ia_src_blend_factor = i;
		2189	state->cc5.ia_dest_blend_factor = j;
		2190
		2191	state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
		2192	state->cc6.clamp_post_alpha_blend = 1;
		2193	state->cc6.clamp_pre_alpha_blend = 1;
		2194	state->cc6.src_blend_factor = i;
		2195	state->cc6.dest_blend_factor = j;
		2196
		2197	ptr += 64;
		2198	}
		2199	}
		2200
		2201	return sna_static_stream_offsetof(stream, base);
		2202	}
		2203
		2204	static bool gen4_render_setup(struct sna *sna)
		2205	{
		2206	struct gen4_render_state *state = &sna->render_state.gen4;
		2207	struct sna_static_stream general;
		2208	struct gen4_wm_unit_state_padded *wm_state;
		2209	uint32_t sf, wm[KERNEL_COUNT];
		2210	int i, j, k, l, m;
		2211
		2212	sna_static_stream_init(&general);
		2213
		2214	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
		2215	* dumps, you know it points to zero.
		2216	*/
		2217	null_create(&general);
		2218
		2219	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
		2220	for (m = 0; m < KERNEL_COUNT; m++) {
		2221	if (wm_kernels[m].size) {
		2222	wm[m] = sna_static_stream_add(&general,
		2223	wm_kernels[m].data,
		2224	wm_kernels[m].size,
		2225	64);
		2226	} else {
		2227	wm[m] = sna_static_stream_compile_wm(sna, &general,
		2228	wm_kernels[m].data,
		2229	16);
		2230	}
		2231	}
		2232
		2233	state->vs = gen4_create_vs_unit_state(&general);
4251	Serge	2234	state->sf = gen4_create_sf_state(&general, sf);
3291	Serge	2235
		2236	wm_state = sna_static_stream_map(&general,
		2237	sizeof(wm_state) KERNEL_COUNT *
		2238	FILTER_COUNT * EXTEND_COUNT *
		2239	FILTER_COUNT * EXTEND_COUNT,
		2240	64);
		2241	state->wm = sna_static_stream_offsetof(&general, wm_state);
		2242	for (i = 0; i < FILTER_COUNT; i++) {
		2243	for (j = 0; j < EXTEND_COUNT; j++) {
		2244	for (k = 0; k < FILTER_COUNT; k++) {
		2245	for (l = 0; l < EXTEND_COUNT; l++) {
		2246	uint32_t sampler_state;
		2247
		2248	sampler_state =
		2249	gen4_create_sampler_state(&general,
		2250	i, j,
		2251	k, l);
		2252
		2253	for (m = 0; m < KERNEL_COUNT; m++) {
		2254	gen4_init_wm_state(&wm_state->state,
		2255	sna->kgem.gen,
		2256	wm_kernels[m].has_mask,
		2257	wm[m], sampler_state);
		2258	wm_state++;
		2259	}
		2260	}
		2261	}
		2262	}
		2263	}
		2264
		2265	state->cc = gen4_create_cc_unit_state(&general);
		2266
		2267	state->general_bo = sna_static_stream_fini(sna, &general);
		2268	return state->general_bo != NULL;
		2269	}
		2270
4251	Serge	2271	const char gen4_render_init(struct sna sna, const char *backend)
3291	Serge	2272	{
		2273	if (!gen4_render_setup(sna))
4251	Serge	2274	return backend;
3291	Serge	2275
		2276	sna->kgem.retire = gen4_render_retire;
		2277	sna->kgem.expire = gen4_render_expire;
		2278
4251	Serge	2279	#if 0
		2280	#if !NO_COMPOSITE
		2281	sna->render.composite = gen4_render_composite;
3291	Serge	2282	sna->render.prefer_gpu \|= PREFER_GPU_RENDER;
4251	Serge	2283	#endif
		2284	#if !NO_COMPOSITE_SPANS
		2285	sna->render.check_composite_spans = gen4_check_composite_spans;
		2286	sna->render.composite_spans = gen4_render_composite_spans;
		2287	if (0)
		2288	sna->render.prefer_gpu \|= PREFER_GPU_SPANS;
		2289	#endif
3291	Serge	2290
4251	Serge	2291	#if !NO_VIDEO
		2292	sna->render.video = gen4_render_video;
		2293	#endif
		2294
		2295	#if !NO_COPY_BOXES
		2296	sna->render.copy_boxes = gen4_render_copy_boxes;
		2297	#endif
		2298	#if !NO_COPY
		2299	sna->render.copy = gen4_render_copy;
		2300	#endif
		2301
		2302	#if !NO_FILL_BOXES
		2303	sna->render.fill_boxes = gen4_render_fill_boxes;
		2304	#endif
		2305	#if !NO_FILL
		2306	sna->render.fill = gen4_render_fill;
		2307	#endif
		2308	#if !NO_FILL_ONE
		2309	sna->render.fill_one = gen4_render_fill_one;
		2310	#endif
		2311
		2312	#endif
		2313
3291	Serge	2314	sna->render.blit_tex = gen4_blit_tex;
4251	Serge	2315	sna->render.caps = HW_BIT_BLIT \| HW_TEX_BLIT;
3291	Serge	2316
		2317	sna->render.flush = gen4_render_flush;
		2318	sna->render.reset = gen4_render_reset;
		2319	sna->render.fini = gen4_render_fini;
		2320
		2321	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
		2322	sna->render.max_3d_pitch = 1 << 18;
4251	Serge	2323	return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
		2324	}
		2325
		2326	static bool
		2327	gen4_blit_tex(struct sna *sna,
		2328	uint8_t op, bool scale,
		2329	PixmapPtr src, struct kgem_bo *src_bo,
		2330	PixmapPtr mask,struct kgem_bo *mask_bo,
		2331	PixmapPtr dst, struct kgem_bo *dst_bo,
		2332	int32_t src_x, int32_t src_y,
		2333	int32_t msk_x, int32_t msk_y,
		2334	int32_t dst_x, int32_t dst_y,
		2335	int32_t width, int32_t height,
		2336	struct sna_composite_op *tmp)
		2337	{
		2338
		2339	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
		2340	width, height, sna->kgem.ring));
		2341
		2342	tmp->op = PictOpSrc;
		2343
		2344	tmp->dst.pixmap = dst;
		2345	tmp->dst.bo = dst_bo;
		2346	tmp->dst.width = dst->drawable.width;
		2347	tmp->dst.height = dst->drawable.height;
		2348	tmp->dst.format = PICT_x8r8g8b8;
		2349
		2350
		2351	tmp->src.repeat = RepeatNone;
		2352	tmp->src.filter = PictFilterNearest;
		2353	tmp->src.is_affine = true;
		2354
		2355	tmp->src.bo = src_bo;
		2356	tmp->src.pict_format = PICT_x8r8g8b8;
		2357	tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
		2358	tmp->src.width = src->drawable.width;
		2359	tmp->src.height = src->drawable.height;
		2360
		2361	tmp->is_affine = tmp->src.is_affine;
		2362	tmp->has_component_alpha = false;
		2363	tmp->need_magic_ca_pass = false;
		2364
		2365	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
		2366	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
		2367	tmp->mask.is_affine = true;
		2368
		2369	tmp->mask.bo = mask_bo;
		2370	tmp->mask.pict_format = PIXMAN_a8;
		2371	tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
		2372	tmp->mask.width = mask->drawable.width;
		2373	tmp->mask.height = mask->drawable.height;
		2374
		2375	if( scale )
		2376	{
		2377	tmp->src.scale[0] = 1.f/width;
		2378	tmp->src.scale[1] = 1.f/height;
		2379	}
		2380	else
		2381	{
		2382	tmp->src.scale[0] = 1.f/src->drawable.width;
		2383	tmp->src.scale[1] = 1.f/src->drawable.height;
		2384	}
		2385	// tmp->src.offset[0] = -dst_x;
		2386	// tmp->src.offset[1] = -dst_y;
		2387
		2388
		2389	tmp->mask.scale[0] = 1.f/mask->drawable.width;
		2390	tmp->mask.scale[1] = 1.f/mask->drawable.height;
		2391	// tmp->mask.offset[0] = -dst_x;
		2392	// tmp->mask.offset[1] = -dst_y;
		2393
4281	Serge	2394	tmp->u.gen4.wm_kernel = WM_KERNEL_MASK;
		2395	// gen4_choose_composite_kernel(tmp->op,
		2396	// tmp->mask.bo != NULL,
		2397	// tmp->has_component_alpha,
		2398	// tmp->is_affine);
4251	Serge	2399	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
		2400
		2401	tmp->blt = gen4_render_composite_blt;
		2402	tmp->done = gen4_render_composite_done;
		2403
		2404	if (!kgem_check_bo(&sna->kgem,
		2405	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		2406	NULL)) {
		2407	kgem_submit(&sna->kgem);
		2408	}
		2409
		2410	gen4_bind_surfaces(sna, tmp);
		2411	gen4_align_vertex(sna, tmp);
3291	Serge	2412	return true;
		2413	}
		2414

Subversion Repositories Kolibri OS

(root)/drivers/video/Intel-2D/gen4_render.c – Rev 4281