WebSVN – Kolibri OS – Blame – /drivers/video/Intel-2D/gen5_render.c

Rev	Author	Line No.	Line
3280	Serge	1	/*
		2	* Copyright © 2006,2008,2011 Intel Corporation
		3	* Copyright © 2007 Red Hat, Inc.
		4	*
		5	* Permission is hereby granted, free of charge, to any person obtaining a
		6	* copy of this software and associated documentation files (the "Software"),
		7	* to deal in the Software without restriction, including without limitation
		8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		9	* and/or sell copies of the Software, and to permit persons to whom the
		10	* Software is furnished to do so, subject to the following conditions:
		11	*
		12	* The above copyright notice and this permission notice (including the next
		13	* paragraph) shall be included in all copies or substantial portions of the
		14	* Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
		22	* SOFTWARE.
		23	*
		24	* Authors:
		25	* Wang Zhenyu
		26	* Eric Anholt
		27	* Carl Worth
		28	* Keith Packard
		29	* Chris Wilson
		30	*
		31	*/
		32
		33	#ifdef HAVE_CONFIG_H
		34	#include "config.h"
		35	#endif
		36
		37	#include "sna.h"
		38	#include "sna_reg.h"
		39	#include "sna_render.h"
		40	#include "sna_render_inline.h"
		41	//#include "sna_video.h"
		42
		43	#include "brw/brw.h"
		44	#include "gen5_render.h"
		45	#include "gen4_source.h"
		46	#include "gen4_vertex.h"
		47
		48	#define NO_COMPOSITE 0
		49	#define NO_COMPOSITE_SPANS 0
		50
		51	#define PREFER_BLT_FILL 1
		52
		53	#define DBG_NO_STATE_CACHE 0
		54	#define DBG_NO_SURFACE_CACHE 0
		55
		56	#define MAX_3D_SIZE 8192
		57
		58	#define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
		59
		60	/* Set up a default static partitioning of the URB, which is supposed to
		61	* allow anything we would want to do, at potentially lower performance.
		62	*/
		63	#define URB_CS_ENTRY_SIZE 1
		64	#define URB_CS_ENTRIES 0
		65
		66	#define URB_VS_ENTRY_SIZE 1
		67	#define URB_VS_ENTRIES 256 /* minimum of 8 */
		68
		69	#define URB_GS_ENTRY_SIZE 0
		70	#define URB_GS_ENTRIES 0
		71
		72	#define URB_CLIP_ENTRY_SIZE 0
		73	#define URB_CLIP_ENTRIES 0
		74
		75	#define URB_SF_ENTRY_SIZE 2
		76	#define URB_SF_ENTRIES 64
		77
		78	/*
		79	* this program computes dA/dx and dA/dy for the texture coordinates along
		80	* with the base texture coordinate. It was extracted from the Mesa driver
		81	*/
		82
		83	#define SF_KERNEL_NUM_GRF 16
		84	#define SF_MAX_THREADS 48
		85
		86	#define PS_KERNEL_NUM_GRF 32
		87	#define PS_MAX_THREADS 72
		88
		89	static const uint32_t ps_kernel_packed_static[][4] = {
		90	#include "exa_wm_xy.g5b"
		91	#include "exa_wm_src_affine.g5b"
		92	#include "exa_wm_src_sample_argb.g5b"
		93	#include "exa_wm_yuv_rgb.g5b"
		94	#include "exa_wm_write.g5b"
		95	};
		96
		97	static const uint32_t ps_kernel_planar_static[][4] = {
		98	#include "exa_wm_xy.g5b"
		99	#include "exa_wm_src_affine.g5b"
		100	#include "exa_wm_src_sample_planar.g5b"
		101	#include "exa_wm_yuv_rgb.g5b"
		102	#include "exa_wm_write.g5b"
		103	};
		104
		105	#define NOKERNEL(kernel_enum, func, masked) \
		106	[kernel_enum] = {func, 0, masked}
		107	#define KERNEL(kernel_enum, kernel, masked) \
		108	[kernel_enum] = {&kernel, sizeof(kernel), masked}
		109	static const struct wm_kernel_info {
		110	const void *data;
		111	unsigned int size;
		112	bool has_mask;
		113	} wm_kernels[] = {
		114	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
		115	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
		116
		117	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
		118	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
		119
		120	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
		121	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
		122
		123	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
		124	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
		125
		126	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
		127	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
		128
		129	KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
		130	KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
		131	};
		132	#undef KERNEL
		133
		134	static const struct blendinfo {
		135	bool src_alpha;
		136	uint32_t src_blend;
		137	uint32_t dst_blend;
		138	} gen5_blend_op[] = {
		139	/* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
		140	/* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
		141	/* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
		142	/* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
		143	/* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
		144	/* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
		145	/* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
		146	/* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
		147	/* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
		148	/* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
		149	/* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
		150	/* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
		151	/* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
		152	};
		153
		154	/**
		155	* Highest-valued BLENDFACTOR used in gen5_blend_op.
		156	*
		157	* This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
		158	* GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
		159	* GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
		160	*/
		161	#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
		162
		163	#define BLEND_OFFSET(s, d) \
		164	(((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
		165
		166	#define SAMPLER_OFFSET(sf, se, mf, me, k) \
		167	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
		168
		169	static bool
		170	gen5_emit_pipelined_pointers(struct sna *sna,
		171	const struct sna_composite_op *op,
		172	int blend, int kernel);
		173
		174	#define OUT_BATCH(v) batch_emit(sna, v)
		175	#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
		176	#define OUT_VERTEX_F(v) vertex_emit(sna, v)
		177
		178	static inline bool too_large(int width, int height)
		179	{
		180	return width > MAX_3D_SIZE \|\| height > MAX_3D_SIZE;
		181	}
		182
		183	static int
		184	gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
		185	{
		186	int base;
		187
		188	if (has_mask) {
		189	if (is_ca) {
		190	if (gen5_blend_op[op].src_alpha)
		191	base = WM_KERNEL_MASKSA;
		192	else
		193	base = WM_KERNEL_MASKCA;
		194	} else
		195	base = WM_KERNEL_MASK;
		196	} else
		197	base = WM_KERNEL;
		198
		199	return base + !is_affine;
		200	}
		201
		202	static bool gen5_magic_ca_pass(struct sna *sna,
		203	const struct sna_composite_op *op)
		204	{
		205	struct gen5_render_state *state = &sna->render_state.gen5;
		206
		207	if (!op->need_magic_ca_pass)
		208	return false;
		209
		210	assert(sna->render.vertex_index > sna->render.vertex_start);
		211
		212	DBG(("%s: CA fixup\n", __FUNCTION__));
		213	assert(op->mask.bo != NULL);
		214	assert(op->has_component_alpha);
		215
		216	gen5_emit_pipelined_pointers
		217	(sna, op, PictOpAdd,
		218	gen5_choose_composite_kernel(PictOpAdd,
		219	true, true, op->is_affine));
		220
		221	OUT_BATCH(GEN5_3DPRIMITIVE \|
		222	GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		223	(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		224	(0 << 9) \|
		225	4);
		226	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
		227	OUT_BATCH(sna->render.vertex_start);
		228	OUT_BATCH(1); /* single instance */
		229	OUT_BATCH(0); /* start instance location */
		230	OUT_BATCH(0); /* index buffer offset, ignored */
		231
		232	state->last_primitive = sna->kgem.nbatch;
		233	return true;
		234	}
		235
		236	static uint32_t gen5_get_blend(int op,
		237	bool has_component_alpha,
		238	uint32_t dst_format)
		239	{
		240	uint32_t src, dst;
		241
		242	src = GEN5_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
		243	dst = GEN5_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
		244	#if 0
		245	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
		246	* it as always 1.
		247	*/
		248	if (PICT_FORMAT_A(dst_format) == 0) {
		249	if (src == GEN5_BLENDFACTOR_DST_ALPHA)
		250	src = GEN5_BLENDFACTOR_ONE;
		251	else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
		252	src = GEN5_BLENDFACTOR_ZERO;
		253	}
		254
		255	/* If the source alpha is being used, then we should only be in a
		256	* case where the source blend factor is 0, and the source blend
		257	* value is the mask channels multiplied by the source picture's alpha.
		258	*/
		259	if (has_component_alpha && gen5_blend_op[op].src_alpha) {
		260	if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
		261	dst = GEN5_BLENDFACTOR_SRC_COLOR;
		262	else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
		263	dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
		264	}
		265	#endif
		266
		267	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
		268	op, dst_format, PICT_FORMAT_A(dst_format),
		269	src, dst, BLEND_OFFSET(src, dst)));
		270	return BLEND_OFFSET(src, dst);
		271	}
		272
		273	static uint32_t gen5_get_card_format(PictFormat format)
		274	{
		275	switch (format) {
		276	default:
		277	return -1;
		278	case PICT_a8r8g8b8:
		279	return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
		280	case PICT_x8r8g8b8:
		281	return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
		282	case PICT_a8:
		283	return GEN5_SURFACEFORMAT_A8_UNORM;
		284	}
		285	}
		286
		287	static uint32_t gen5_get_dest_format(PictFormat format)
		288	{
		289	switch (format) {
		290	default:
		291	return -1;
		292	case PICT_a8r8g8b8:
		293	case PICT_x8r8g8b8:
		294	return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
		295	case PICT_a8:
		296	return GEN5_SURFACEFORMAT_A8_UNORM;
		297	}
		298	}
		299	typedef struct gen5_surface_state_padded {
		300	struct gen5_surface_state state;
		301	char pad[32 - sizeof(struct gen5_surface_state)];
		302	} gen5_surface_state_padded;
		303
		304	static void null_create(struct sna_static_stream *stream)
		305	{
		306	/* A bunch of zeros useful for legacy border color and depth-stencil */
		307	sna_static_stream_map(stream, 64, 64);
		308	}
		309
		310	static void
		311	sampler_state_init(struct gen5_sampler_state *sampler_state,
		312	sampler_filter_t filter,
		313	sampler_extend_t extend)
		314	{
		315	sampler_state->ss0.lod_preclamp = 1; /* GL mode */
		316
		317	/* We use the legacy mode to get the semantics specified by
		318	* the Render extension. */
		319	sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
		320
		321	switch (filter) {
		322	default:
		323	case SAMPLER_FILTER_NEAREST:
		324	sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
		325	sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
		326	break;
		327	case SAMPLER_FILTER_BILINEAR:
		328	sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
		329	sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
		330	break;
		331	}
		332
		333	switch (extend) {
		334	default:
		335	case SAMPLER_EXTEND_NONE:
		336	sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
		337	sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
		338	sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
		339	break;
		340	case SAMPLER_EXTEND_REPEAT:
		341	sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
		342	sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
		343	sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
		344	break;
		345	case SAMPLER_EXTEND_PAD:
		346	sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
		347	sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
		348	sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
		349	break;
		350	case SAMPLER_EXTEND_REFLECT:
		351	sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
		352	sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
		353	sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
		354	break;
		355	}
		356	}
		357
		358	static uint32_t
		359	gen5_tiling_bits(uint32_t tiling)
		360	{
		361	switch (tiling) {
		362	default: assert(0);
		363	case I915_TILING_NONE: return 0;
		364	case I915_TILING_X: return GEN5_SURFACE_TILED;
		365	case I915_TILING_Y: return GEN5_SURFACE_TILED \| GEN5_SURFACE_TILED_Y;
		366	}
		367	}
		368
		369	/**
		370	* Sets up the common fields for a surface state buffer for the given
		371	* picture in the given surface state buffer.
		372	*/
		373	static uint32_t
		374	gen5_bind_bo(struct sna *sna,
		375	struct kgem_bo *bo,
		376	uint32_t width,
		377	uint32_t height,
		378	uint32_t format,
		379	bool is_dst)
		380	{
		381	uint32_t domains;
		382	uint16_t offset;
		383	uint32_t *ss;
		384
		385	/* After the first bind, we manage the cache domains within the batch */
		386	if (!DBG_NO_SURFACE_CACHE) {
		387	offset = kgem_bo_get_binding(bo, format);
		388	if (offset) {
		389	if (is_dst)
		390	kgem_bo_mark_dirty(bo);
		391	return offset * sizeof(uint32_t);
		392	}
		393	}
		394
		395	offset = sna->kgem.surface -=
		396	sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
		397	ss = sna->kgem.batch + offset;
		398
		399	ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT \|
		400	GEN5_SURFACE_BLEND_ENABLED \|
		401	format << GEN5_SURFACE_FORMAT_SHIFT);
		402
		403	if (is_dst)
		404	domains = I915_GEM_DOMAIN_RENDER << 16 \| I915_GEM_DOMAIN_RENDER;
		405	else
		406	domains = I915_GEM_DOMAIN_SAMPLER << 16;
		407	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
		408
		409	ss[2] = ((width - 1) << GEN5_SURFACE_WIDTH_SHIFT \|
		410	(height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
		411	ss[3] = (gen5_tiling_bits(bo->tiling) \|
		412	(bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
		413	ss[4] = 0;
		414	ss[5] = 0;
		415
		416	kgem_bo_set_binding(bo, format, offset);
		417
		418	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
		419	offset, bo->handle, ss[1],
		420	format, width, height, bo->pitch, bo->tiling,
		421	domains & 0xffff ? "render" : "sampler"));
		422
		423	return offset * sizeof(uint32_t);
		424	}
		425
		426	static void gen5_emit_vertex_buffer(struct sna *sna,
		427	const struct sna_composite_op *op)
		428	{
		429	int id = op->u.gen5.ve_id;
		430
		431	assert((sna->render.vb_id & (1 << id)) == 0);
		432
		433	OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS \| 3);
		434	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT \| VB0_VERTEXDATA \|
		435	(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
		436	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
		437	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
		438	OUT_BATCH(0);
		439	OUT_BATCH(~0); /* max address: disabled */
		440	OUT_BATCH(0);
		441
		442	sna->render.vb_id \|= 1 << id;
		443	}
		444
		445	static void gen5_emit_primitive(struct sna *sna)
		446	{
		447	if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
		448	sna->render.vertex_offset = sna->kgem.nbatch - 5;
		449	return;
		450	}
		451
		452	OUT_BATCH(GEN5_3DPRIMITIVE \|
		453	GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		454	(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		455	(0 << 9) \|
		456	4);
		457	sna->render.vertex_offset = sna->kgem.nbatch;
		458	OUT_BATCH(0); /* vertex count, to be filled in later */
		459	OUT_BATCH(sna->render.vertex_index);
		460	OUT_BATCH(1); /* single instance */
		461	OUT_BATCH(0); /* start instance location */
		462	OUT_BATCH(0); /* index buffer offset, ignored */
		463	sna->render.vertex_start = sna->render.vertex_index;
		464
		465	sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
		466	}
		467
		468	static bool gen5_rectangle_begin(struct sna *sna,
		469	const struct sna_composite_op *op)
		470	{
		471	int id = op->u.gen5.ve_id;
		472	int ndwords;
		473
		474	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
		475	return true;
		476
		477	ndwords = op->need_magic_ca_pass ? 20 : 6;
		478	if ((sna->render.vb_id & (1 << id)) == 0)
		479	ndwords += 5;
		480
		481	if (!kgem_check_batch(&sna->kgem, ndwords))
		482	return false;
		483
		484	if ((sna->render.vb_id & (1 << id)) == 0)
		485	gen5_emit_vertex_buffer(sna, op);
		486	if (sna->render.vertex_offset == 0)
		487	gen5_emit_primitive(sna);
		488
		489	return true;
		490	}
		491
		492	static int gen5_get_rectangles__flush(struct sna *sna,
		493	const struct sna_composite_op *op)
		494	{
		495	/* Preventing discarding new vbo after lock contention */
		496	if (sna_vertex_wait__locked(&sna->render)) {
		497	int rem = vertex_space(sna);
		498	if (rem > op->floats_per_rect)
		499	return rem;
		500	}
		501
		502	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
		503	return 0;
		504	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
		505	return 0;
		506
		507	if (sna->render.vertex_offset) {
		508	gen4_vertex_flush(sna);
		509	if (gen5_magic_ca_pass(sna, op))
		510	gen5_emit_pipelined_pointers(sna, op, op->op,
		511	op->u.gen5.wm_kernel);
		512	}
		513
		514	return gen4_vertex_finish(sna);
		515	}
		516
		517	inline static int gen5_get_rectangles(struct sna *sna,
		518	const struct sna_composite_op *op,
		519	int want,
		520	void (emit_state)(struct sna sna,
		521	const struct sna_composite_op *op))
		522	{
		523	int rem;
		524
		525	assert(want);
		526
		527	start:
		528	rem = vertex_space(sna);
		529	if (unlikely(rem < op->floats_per_rect)) {
		530	DBG(("flushing vbo for %s: %d < %d\n",
		531	__FUNCTION__, rem, op->floats_per_rect));
		532	rem = gen5_get_rectangles__flush(sna, op);
		533	if (unlikely (rem == 0))
		534	goto flush;
		535	}
		536
		537	if (unlikely(sna->render.vertex_offset == 0)) {
		538	if (!gen5_rectangle_begin(sna, op))
		539	goto flush;
		540	else
		541	goto start;
		542	}
		543
		544	assert(op->floats_per_rect >= vertex_space(sna));
		545	assert(rem <= vertex_space(sna));
		546	if (want > 1 && want * op->floats_per_rect > rem)
		547	want = rem / op->floats_per_rect;
		548
		549	sna->render.vertex_index += 3*want;
		550	return want;
		551
		552	flush:
		553	if (sna->render.vertex_offset) {
		554	gen4_vertex_flush(sna);
		555	gen5_magic_ca_pass(sna, op);
		556	}
		557	sna_vertex_wait__locked(&sna->render);
		558	_kgem_submit(&sna->kgem);
		559	emit_state(sna, op);
		560	goto start;
		561	}
		562
		563	static uint32_t *
		564	gen5_composite_get_binding_table(struct sna *sna,
		565	uint16_t *offset)
		566	{
		567	sna->kgem.surface -=
		568	sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
		569
		570	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
		571
		572	/* Clear all surplus entries to zero in case of prefetch */
		573	*offset = sna->kgem.surface;
		574	return memset(sna->kgem.batch + sna->kgem.surface,
		575	0, sizeof(struct gen5_surface_state_padded));
		576	}
		577
		578	static void
		579	gen5_emit_urb(struct sna *sna)
		580	{
		581	int urb_vs_start, urb_vs_size;
		582	int urb_gs_start, urb_gs_size;
		583	int urb_clip_start, urb_clip_size;
		584	int urb_sf_start, urb_sf_size;
		585	int urb_cs_start, urb_cs_size;
		586
		587	urb_vs_start = 0;
		588	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
		589	urb_gs_start = urb_vs_start + urb_vs_size;
		590	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
		591	urb_clip_start = urb_gs_start + urb_gs_size;
		592	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
		593	urb_sf_start = urb_clip_start + urb_clip_size;
		594	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
		595	urb_cs_start = urb_sf_start + urb_sf_size;
		596	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
		597
		598	OUT_BATCH(GEN5_URB_FENCE \|
		599	UF0_CS_REALLOC \|
		600	UF0_SF_REALLOC \|
		601	UF0_CLIP_REALLOC \|
		602	UF0_GS_REALLOC \|
		603	UF0_VS_REALLOC \|
		604	1);
		605	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) \|
		606	((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) \|
		607	((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
		608	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) \|
		609	((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
		610
		611	/* Constant buffer state */
		612	OUT_BATCH(GEN5_CS_URB_STATE \| 0);
		613	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 \| URB_CS_ENTRIES << 0);
		614	}
		615
		616	static void
		617	gen5_emit_state_base_address(struct sna *sna)
		618	{
		619	assert(sna->render_state.gen5.general_bo->proxy == NULL);
		620	OUT_BATCH(GEN5_STATE_BASE_ADDRESS \| 6);
		621	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
		622	sna->kgem.nbatch,
		623	sna->render_state.gen5.general_bo,
		624	I915_GEM_DOMAIN_INSTRUCTION << 16,
		625	BASE_ADDRESS_MODIFY));
		626	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
		627	sna->kgem.nbatch,
		628	NULL,
		629	I915_GEM_DOMAIN_INSTRUCTION << 16,
		630	BASE_ADDRESS_MODIFY));
		631	OUT_BATCH(0); /* media */
		632	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
		633	sna->kgem.nbatch,
		634	sna->render_state.gen5.general_bo,
		635	I915_GEM_DOMAIN_INSTRUCTION << 16,
		636	BASE_ADDRESS_MODIFY));
		637
		638	/* upper bounds, all disabled */
		639	OUT_BATCH(BASE_ADDRESS_MODIFY);
		640	OUT_BATCH(0);
		641	OUT_BATCH(BASE_ADDRESS_MODIFY);
		642	}
		643
		644	static void
		645	gen5_emit_invariant(struct sna *sna)
		646	{
		647	/* Ironlake errata workaround: Before disabling the clipper,
		648	* you have to MI_FLUSH to get the pipeline idle.
		649	*
		650	* However, the kernel flushes the pipeline between batches,
		651	* so we should be safe....
		652	* OUT_BATCH(MI_FLUSH \| MI_INHIBIT_RENDER_CACHE_FLUSH);
		653	*/
		654	OUT_BATCH(GEN5_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		655
		656	gen5_emit_state_base_address(sna);
		657
		658	sna->render_state.gen5.needs_invariant = false;
		659	}
		660
		661	static void
		662	gen5_get_batch(struct sna sna, const struct sna_composite_op op)
		663	{
		664	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
		665
		666	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
		667	DBG(("%s: flushing batch: %d < %d+%d\n",
		668	__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
		669	150, 4*8));
		670	kgem_submit(&sna->kgem);
		671	_kgem_set_mode(&sna->kgem, KGEM_RENDER);
		672	}
		673
		674	if (sna->render_state.gen5.needs_invariant)
		675	gen5_emit_invariant(sna);
		676	}
		677
		678	static void
		679	gen5_align_vertex(struct sna sna, const struct sna_composite_op op)
		680	{
		681	assert(op->floats_per_rect == 3*op->floats_per_vertex);
		682	if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
		683	if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
		684	gen4_vertex_finish(sna);
		685
		686	DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
		687	sna->render_state.gen5.floats_per_vertex,
		688	op->floats_per_vertex,
		689	sna->render.vertex_index,
		690	(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
		691	sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
		692	sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
		693	sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
		694	}
		695	}
		696
		697	static void
		698	gen5_emit_binding_table(struct sna *sna, uint16_t offset)
		699	{
		700	if (!DBG_NO_STATE_CACHE &&
		701	sna->render_state.gen5.surface_table == offset)
		702	return;
		703
		704	sna->render_state.gen5.surface_table = offset;
		705
		706	/* Binding table pointers */
		707	OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS \| 4);
		708	OUT_BATCH(0); /* vs */
		709	OUT_BATCH(0); /* gs */
		710	OUT_BATCH(0); /* clip */
		711	OUT_BATCH(0); /* sf */
		712	/* Only the PS uses the binding table */
		713	OUT_BATCH(offset*4);
		714	}
		715
		716	static bool
		717	gen5_emit_pipelined_pointers(struct sna *sna,
		718	const struct sna_composite_op *op,
		719	int blend, int kernel)
		720	{
		721	uint16_t sp, bp;
		722	uint32_t key;
		723
		724	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
		725	__FUNCTION__, op->u.gen5.ve_id & 2,
		726	op->src.filter, op->src.repeat,
		727	op->mask.filter, op->mask.repeat,
		728	kernel, blend, op->has_component_alpha, (int)op->dst.format));
		729
		730	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
		731	op->mask.filter, op->mask.repeat,
		732	kernel);
		733	bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
		734
		735	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
		736	key = sp \| (uint32_t)bp << 16 \| (op->mask.bo != NULL) << 31;
		737	if (key == sna->render_state.gen5.last_pipelined_pointers)
		738	return false;
		739
		740
		741	OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS \| 5);
		742	OUT_BATCH(sna->render_state.gen5.vs);
		743	OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
		744	OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
		745	OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
		746	OUT_BATCH(sna->render_state.gen5.wm + sp);
		747	OUT_BATCH(sna->render_state.gen5.cc + bp);
		748
		749	sna->render_state.gen5.last_pipelined_pointers = key;
		750	return true;
		751	}
		752
		753	static void
		754	gen5_emit_drawing_rectangle(struct sna sna, const struct sna_composite_op op)
		755	{
		756	uint32_t limit = (op->dst.height - 1) << 16 \| (op->dst.width - 1);
		757	uint32_t offset = (uint16_t)op->dst.y << 16 \| (uint16_t)op->dst.x;
		758
		759	assert(!too_large(op->dst.x, op->dst.y));
		760	assert(!too_large(op->dst.width, op->dst.height));
		761
		762	if (!DBG_NO_STATE_CACHE &&
		763	sna->render_state.gen5.drawrect_limit == limit &&
		764	sna->render_state.gen5.drawrect_offset == offset)
		765	return;
		766
		767	sna->render_state.gen5.drawrect_offset = offset;
		768	sna->render_state.gen5.drawrect_limit = limit;
		769
		770	OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE \| (4 - 2));
		771	OUT_BATCH(0x00000000);
		772	OUT_BATCH(limit);
		773	OUT_BATCH(offset);
		774	}
		775
		776	static void
		777	gen5_emit_vertex_elements(struct sna *sna,
		778	const struct sna_composite_op *op)
		779	{
		780	/*
		781	* vertex data in vertex buffer
		782	* position: (x, y)
		783	* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
		784	* texture coordinate 1 if (has_mask is true): same as above
		785	*/
		786	struct gen5_render_state *render = &sna->render_state.gen5;
		787	int id = op->u.gen5.ve_id;
		788	bool has_mask = id >> 2;
		789	uint32_t format, dw;
		790
		791	if (!DBG_NO_STATE_CACHE && render->ve_id == id)
		792	return;
		793
		794	DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
		795	render->ve_id = id;
		796
		797	/* The VUE layout
		798	* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
		799	* dword 4-7: position (x, y, 1.0, 1.0),
		800	* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
		801	* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
		802	*
		803	* dword 4-15 are fetched from vertex buffer
		804	*/
		805	OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS \|
		806	((2 * (has_mask ? 4 : 3)) + 1 - 2));
		807
		808	OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) \| VE0_VALID \|
		809	(GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) \|
		810	(0 << VE0_OFFSET_SHIFT));
		811	OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) \|
		812	(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) \|
		813	(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) \|
		814	(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
		815
		816	/* x,y */
		817	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		818	GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		819
		820	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT \|
		821	VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT \|
		822	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT \|
		823	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
		824
		825	/* u0, v0, w0 */
		826	DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
		827	id, id & 3));
		828	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		829	switch (id & 3) {
		830	default:
		831	assert(0);
		832	case 0:
		833	format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
		834	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		835	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		836	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		837	break;
		838	case 1:
		839	format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
		840	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		841	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		842	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		843	break;
		844	case 2:
		845	format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
		846	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		847	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		848	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		849	break;
		850	case 3:
		851	format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
		852	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		853	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		854	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		855	break;
		856	}
		857	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		858	format \| 4 << VE0_OFFSET_SHIFT);
		859	OUT_BATCH(dw);
		860
		861	/* u1, v1, w1 */
		862	if (has_mask) {
		863	unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
		864	DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
		865	id, id >> 2, offset));
		866	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		867	switch (id >> 2) {
		868	case 1:
		869	format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
		870	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		871	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		872	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		873	break;
		874	default:
		875	assert(0);
		876	case 2:
		877	format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
		878	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		879	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		880	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		881	break;
		882	case 3:
		883	format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
		884	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		885	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		886	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		887	break;
		888	}
		889	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		890	format \| offset << VE0_OFFSET_SHIFT);
		891	OUT_BATCH(dw);
		892	}
		893	}
		894
		895	static void
		896	gen5_emit_state(struct sna *sna,
		897	const struct sna_composite_op *op,
		898	uint16_t offset)
		899	{
		900	if (kgem_bo_is_dirty(op->src.bo) \|\| kgem_bo_is_dirty(op->mask.bo)) {
		901	DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
		902	kgem_bo_is_dirty(op->src.bo),
		903	kgem_bo_is_dirty(op->mask.bo)));
		904	OUT_BATCH(MI_FLUSH);
		905	kgem_clear_dirty(&sna->kgem);
		906	kgem_bo_mark_dirty(op->dst.bo);
		907	}
		908
		909	/* drawrect must be first for Ironlake BLT workaround */
		910	gen5_emit_drawing_rectangle(sna, op);
		911	gen5_emit_binding_table(sna, offset);
		912	if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
		913	gen5_emit_urb(sna);
		914	gen5_emit_vertex_elements(sna, op);
		915	}
		916
		917	static void gen5_bind_surfaces(struct sna *sna,
		918	const struct sna_composite_op *op)
		919	{
		920	uint32_t *binding_table;
		921	uint16_t offset;
		922
		923	gen5_get_batch(sna, op);
		924
		925	binding_table = gen5_composite_get_binding_table(sna, &offset);
		926
		927	binding_table[0] =
		928	gen5_bind_bo(sna,
		929	op->dst.bo, op->dst.width, op->dst.height,
		930	gen5_get_dest_format(op->dst.format),
		931	true);
		932	binding_table[1] =
		933	gen5_bind_bo(sna,
		934	op->src.bo, op->src.width, op->src.height,
		935	op->src.card_format,
		936	false);
		937	if (op->mask.bo) {
		938	assert(op->u.gen5.ve_id >> 2);
		939	binding_table[2] =
		940	gen5_bind_bo(sna,
		941	op->mask.bo,
		942	op->mask.width,
		943	op->mask.height,
		944	op->mask.card_format,
		945	false);
		946	}
		947
		948	if (sna->kgem.surface == offset &&
		949	(uint64_t )(sna->kgem.batch + sna->render_state.gen5.surface_table) == (uint64_t)binding_table &&
		950	(op->mask.bo == NULL \|\|
		951	sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
		952	sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
		953	offset = sna->render_state.gen5.surface_table;
		954	}
		955
		956	gen5_emit_state(sna, op, offset);
		957	}
		958
		959	fastcall static void
		960	gen5_render_composite_blt(struct sna *sna,
		961	const struct sna_composite_op *op,
		962	const struct sna_composite_rectangles *r)
		963	{
		964	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
		965	__FUNCTION__,
		966	r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
		967	r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
		968	r->dst.x, r->dst.y, op->dst.x, op->dst.y,
		969	r->width, r->height));
		970
		971	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
		972	op->prim_emit(sna, op, r);
		973	}
		974
		975
		976	static void
		977	gen5_render_composite_done(struct sna *sna,
		978	const struct sna_composite_op *op)
		979	{
		980	if (sna->render.vertex_offset) {
		981	gen4_vertex_flush(sna);
		982	gen5_magic_ca_pass(sna,op);
		983	}
		984
		985	DBG(("%s()\n", __FUNCTION__));
		986
		987	}
		988
		989
		990	static bool
		991	gen5_blit_tex(struct sna *sna,
		992	uint8_t op,
		993	PixmapPtr src, struct kgem_bo *src_bo,
		994	PixmapPtr mask,struct kgem_bo *mask_bo,
		995	PixmapPtr dst, struct kgem_bo *dst_bo,
		996	int32_t src_x, int32_t src_y,
		997	int32_t msk_x, int32_t msk_y,
		998	int32_t dst_x, int32_t dst_y,
		999	int32_t width, int32_t height,
		1000	struct sna_composite_op *tmp)
		1001	{
		1002	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
		1003	width, height, sna->kgem.mode));
		1004
		1005	tmp->op = PictOpSrc;
		1006
		1007	tmp->dst.pixmap = dst;
		1008	tmp->dst.bo = dst_bo;
		1009	tmp->dst.width = dst->drawable.width;
		1010	tmp->dst.height = dst->drawable.height;
		1011	tmp->dst.format = PICT_x8r8g8b8;
		1012
		1013
		1014	tmp->src.repeat = RepeatNone;
		1015	tmp->src.filter = PictFilterNearest;
		1016	tmp->src.is_affine = true;
		1017
		1018	tmp->src.bo = src_bo;
		1019	tmp->src.pict_format = PICT_x8r8g8b8;
		1020	tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
		1021	tmp->src.width = src->drawable.width;
		1022	tmp->src.height = src->drawable.height;
		1023
		1024
		1025	tmp->is_affine = tmp->src.is_affine;
		1026	tmp->has_component_alpha = false;
		1027	tmp->need_magic_ca_pass = false;
		1028
		1029	tmp->mask.is_affine = true;
		1030	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
		1031	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
		1032	tmp->mask.bo = mask_bo;
		1033	tmp->mask.pict_format = PIXMAN_a8;
		1034	tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
		1035	tmp->mask.width = mask->drawable.width;
		1036	tmp->mask.height = mask->drawable.height;
		1037
		1038	tmp->src.scale[0] = 1.f/width; //src->width;
		1039	tmp->src.scale[1] = 1.f/height; //src->height;
		1040
		1041	tmp->mask.scale[0] = 1.f/mask->drawable.width;
		1042	tmp->mask.scale[1] = 1.f/mask->drawable.height;
		1043
		1044
		1045	tmp->u.gen5.wm_kernel =
		1046	gen5_choose_composite_kernel(tmp->op,
		1047	tmp->mask.bo != NULL,
		1048	tmp->has_component_alpha,
		1049	tmp->is_affine);
		1050	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
		1051
		1052	tmp->blt = gen5_render_composite_blt;
		1053	// tmp->box = gen5_render_composite_box;
		1054	tmp->done = gen5_render_composite_done;
		1055
		1056	if (!kgem_check_bo(&sna->kgem,
		1057	tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
		1058	kgem_submit(&sna->kgem);
		1059	}
		1060
		1061	gen5_bind_surfaces(sna, tmp);
		1062	gen5_align_vertex(sna, tmp);
		1063	return true;
		1064
		1065	return false;
		1066	}
		1067
		1068
		1069
		1070	static void
		1071	gen5_render_flush(struct sna *sna)
		1072	{
		1073	gen4_vertex_close(sna);
		1074
		1075	assert(sna->render.vb_id == 0);
		1076	assert(sna->render.vertex_offset == 0);
		1077	}
		1078
		1079	static void
		1080	gen5_render_context_switch(struct kgem *kgem,
		1081	int new_mode)
		1082	{
		1083	if (!kgem->nbatch)
		1084	return;
		1085
		1086	/* WaNonPipelinedStateCommandFlush
		1087	*
		1088	* Ironlake has a limitation that a 3D or Media command can't
		1089	* be the first command after a BLT, unless it's
		1090	* non-pipelined.
		1091	*
		1092	* We do this by ensuring that the non-pipelined drawrect
		1093	* is always emitted first following a switch from BLT.
		1094	*/
		1095	if (kgem->mode == KGEM_BLT) {
		1096	struct sna *sna = to_sna_from_kgem(kgem);
		1097	DBG(("%s: forcing drawrect on next state emission\n",
		1098	__FUNCTION__));
		1099	sna->render_state.gen5.drawrect_limit = -1;
		1100	}
		1101
		1102	if (kgem_ring_is_idle(kgem, kgem->ring)) {
		1103	DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
		1104	_kgem_submit(kgem);
		1105	}
		1106	}
		1107
		1108	static void
		1109	discard_vbo(struct sna *sna)
		1110	{
		1111	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
		1112	sna->render.vbo = NULL;
		1113	sna->render.vertices = sna->render.vertex_data;
		1114	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
		1115	sna->render.vertex_used = 0;
		1116	sna->render.vertex_index = 0;
		1117	}
		1118
		1119	static void
		1120	gen5_render_retire(struct kgem *kgem)
		1121	{
		1122	struct sna *sna;
		1123
		1124	sna = container_of(kgem, struct sna, kgem);
		1125	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
		1126	DBG(("%s: resetting idle vbo\n", __FUNCTION__));
		1127	sna->render.vertex_used = 0;
		1128	sna->render.vertex_index = 0;
		1129	}
		1130	}
		1131
		1132	static void
		1133	gen5_render_expire(struct kgem *kgem)
		1134	{
		1135	struct sna *sna;
		1136
		1137	sna = container_of(kgem, struct sna, kgem);
		1138	if (sna->render.vbo && !sna->render.vertex_used) {
		1139	DBG(("%s: discarding vbo\n", __FUNCTION__));
		1140	discard_vbo(sna);
		1141	}
		1142	}
		1143
		1144	static void gen5_render_reset(struct sna *sna)
		1145	{
		1146	sna->render_state.gen5.needs_invariant = true;
		1147	sna->render_state.gen5.ve_id = -1;
		1148	sna->render_state.gen5.last_primitive = -1;
		1149	sna->render_state.gen5.last_pipelined_pointers = 0;
		1150
		1151	sna->render_state.gen5.drawrect_offset = -1;
		1152	sna->render_state.gen5.drawrect_limit = -1;
		1153	sna->render_state.gen5.surface_table = -1;
		1154
		1155	if (sna->render.vbo &&
		1156	!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
		1157	DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
		1158	discard_vbo(sna);
		1159	}
		1160
		1161	sna->render.vertex_offset = 0;
		1162	sna->render.nvertex_reloc = 0;
		1163	sna->render.vb_id = 0;
		1164	}
		1165
		1166	static void gen5_render_fini(struct sna *sna)
		1167	{
		1168	kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
		1169	}
		1170
		1171	static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
		1172	{
		1173	struct gen5_vs_unit_state vs = sna_static_stream_map(stream, sizeof(vs), 32);
		1174
		1175	/* Set up the vertex shader to be disabled (passthrough) */
		1176	vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
		1177	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
		1178	vs->vs6.vs_enable = 0;
		1179	vs->vs6.vert_cache_disable = 1;
		1180
		1181	return sna_static_stream_offsetof(stream, vs);
		1182	}
		1183
		1184	static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
		1185	uint32_t kernel)
		1186	{
		1187	struct gen5_sf_unit_state *sf_state;
		1188
		1189	sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
		1190
		1191	sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
		1192	sf_state->thread0.kernel_start_pointer = kernel >> 6;
		1193
		1194	sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
		1195	sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
		1196	sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
		1197	/* don't smash vertex header, read start from dw8 */
		1198	sf_state->thread3.urb_entry_read_offset = 1;
		1199	sf_state->thread3.dispatch_grf_start_reg = 3;
		1200	sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
		1201	sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
		1202	sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
		1203	sf_state->sf5.viewport_transform = false; /* skip viewport */
		1204	sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
		1205	sf_state->sf6.scissor = 0;
		1206	sf_state->sf7.trifan_pv = 2;
		1207	sf_state->sf6.dest_org_vbias = 0x8;
		1208	sf_state->sf6.dest_org_hbias = 0x8;
		1209
		1210	return sna_static_stream_offsetof(stream, sf_state);
		1211	}
		1212
		1213	static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
		1214	sampler_filter_t src_filter,
		1215	sampler_extend_t src_extend,
		1216	sampler_filter_t mask_filter,
		1217	sampler_extend_t mask_extend)
		1218	{
		1219	struct gen5_sampler_state *sampler_state;
		1220
		1221	sampler_state = sna_static_stream_map(stream,
		1222	sizeof(struct gen5_sampler_state) * 2,
		1223	32);
		1224	sampler_state_init(&sampler_state[0], src_filter, src_extend);
		1225	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
		1226
		1227	return sna_static_stream_offsetof(stream, sampler_state);
		1228	}
		1229
		1230	static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
		1231	bool has_mask,
		1232	uint32_t kernel,
		1233	uint32_t sampler)
		1234	{
		1235	state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
		1236	state->thread0.kernel_start_pointer = kernel >> 6;
		1237
		1238	state->thread1.single_program_flow = 0;
		1239
		1240	/* scratch space is not used in our kernel */
		1241	state->thread2.scratch_space_base_pointer = 0;
		1242	state->thread2.per_thread_scratch_space = 0;
		1243
		1244	state->thread3.const_urb_entry_read_length = 0;
		1245	state->thread3.const_urb_entry_read_offset = 0;
		1246
		1247	state->thread3.urb_entry_read_offset = 0;
		1248	/* wm kernel use urb from 3, see wm_program in compiler module */
		1249	state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
		1250
		1251	state->wm4.sampler_count = 0; /* hardware requirement */
		1252
		1253	state->wm4.sampler_state_pointer = sampler >> 5;
		1254	state->wm5.max_threads = PS_MAX_THREADS - 1;
		1255	state->wm5.transposed_urb_read = 0;
		1256	state->wm5.thread_dispatch_enable = 1;
		1257	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
		1258	* start point
		1259	*/
		1260	state->wm5.enable_16_pix = 1;
		1261	state->wm5.enable_8_pix = 0;
		1262	state->wm5.early_depth_test = 1;
		1263
		1264	/* Each pair of attributes (src/mask coords) is two URB entries */
		1265	if (has_mask) {
		1266	state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
		1267	state->thread3.urb_entry_read_length = 4;
		1268	} else {
		1269	state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
		1270	state->thread3.urb_entry_read_length = 2;
		1271	}
		1272
		1273	/* binding table entry count is only used for prefetching,
		1274	* and it has to be set 0 for Ironlake
		1275	*/
		1276	state->thread1.binding_table_entry_count = 0;
		1277	}
		1278
		1279	static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
		1280	{
		1281	uint8_t ptr, base;
		1282	int i, j;
		1283
		1284	base = ptr =
		1285	sna_static_stream_map(stream,
		1286	GEN5_BLENDFACTOR_COUNTGEN5_BLENDFACTOR_COUNT64,
		1287	64);
		1288
		1289	for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
		1290	for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
		1291	struct gen5_cc_unit_state *state =
		1292	(struct gen5_cc_unit_state *)ptr;
		1293
		1294	state->cc3.blend_enable =
		1295	!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
		1296
		1297	state->cc5.logicop_func = 0xc; /* COPY */
		1298	state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
		1299
		1300	/* Fill in alpha blend factors same as color, for the future. */
		1301	state->cc5.ia_src_blend_factor = i;
		1302	state->cc5.ia_dest_blend_factor = j;
		1303
		1304	state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
		1305	state->cc6.clamp_post_alpha_blend = 1;
		1306	state->cc6.clamp_pre_alpha_blend = 1;
		1307	state->cc6.src_blend_factor = i;
		1308	state->cc6.dest_blend_factor = j;
		1309
		1310	ptr += 64;
		1311	}
		1312	}
		1313
		1314	return sna_static_stream_offsetof(stream, base);
		1315	}
		1316
		1317	static bool gen5_render_setup(struct sna *sna)
		1318	{
		1319	struct gen5_render_state *state = &sna->render_state.gen5;
		1320	struct sna_static_stream general;
		1321	struct gen5_wm_unit_state_padded *wm_state;
		1322	uint32_t sf[2], wm[KERNEL_COUNT];
		1323	int i, j, k, l, m;
		1324
		1325	sna_static_stream_init(&general);
		1326
		1327	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
		1328	* dumps, you know it points to zero.
		1329	*/
		1330	null_create(&general);
		1331
		1332	/* Set up the two SF states (one for blending with a mask, one without) */
		1333	sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
		1334	sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
		1335
		1336	for (m = 0; m < KERNEL_COUNT; m++) {
		1337	if (wm_kernels[m].size) {
		1338	wm[m] = sna_static_stream_add(&general,
		1339	wm_kernels[m].data,
		1340	wm_kernels[m].size,
		1341	64);
		1342	} else {
		1343	wm[m] = sna_static_stream_compile_wm(sna, &general,
		1344	wm_kernels[m].data,
		1345	16);
		1346	}
		1347	assert(wm[m]);
		1348	}
		1349
		1350	state->vs = gen5_create_vs_unit_state(&general);
		1351
		1352	state->sf[0] = gen5_create_sf_state(&general, sf[0]);
		1353	state->sf[1] = gen5_create_sf_state(&general, sf[1]);
		1354
		1355
		1356	/* Set up the WM states: each filter/extend type for source and mask, per
		1357	* kernel.
		1358	*/
		1359	wm_state = sna_static_stream_map(&general,
		1360	sizeof(wm_state) KERNEL_COUNT *
		1361	FILTER_COUNT * EXTEND_COUNT *
		1362	FILTER_COUNT * EXTEND_COUNT,
		1363	64);
		1364	state->wm = sna_static_stream_offsetof(&general, wm_state);
		1365	for (i = 0; i < FILTER_COUNT; i++) {
		1366	for (j = 0; j < EXTEND_COUNT; j++) {
		1367	for (k = 0; k < FILTER_COUNT; k++) {
		1368	for (l = 0; l < EXTEND_COUNT; l++) {
		1369	uint32_t sampler_state;
		1370
		1371	sampler_state =
		1372	gen5_create_sampler_state(&general,
		1373	i, j,
		1374	k, l);
		1375
		1376	for (m = 0; m < KERNEL_COUNT; m++) {
		1377	gen5_init_wm_state(&wm_state->state,
		1378	wm_kernels[m].has_mask,
		1379	wm[m], sampler_state);
		1380	wm_state++;
		1381	}
		1382	}
		1383	}
		1384	}
		1385	}
		1386
		1387	state->cc = gen5_create_cc_unit_state(&general);
		1388
		1389	state->general_bo = sna_static_stream_fini(sna, &general);
		1390	return state->general_bo != NULL;
		1391	}
		1392
		1393	bool gen5_render_init(struct sna *sna)
		1394	{
		1395	if (!gen5_render_setup(sna))
		1396	return false;
		1397
		1398	sna->kgem.context_switch = gen5_render_context_switch;
		1399	sna->kgem.retire = gen5_render_retire;
		1400	sna->kgem.expire = gen5_render_expire;
		1401
		1402	sna->render.blit_tex = gen5_blit_tex;
		1403
		1404	sna->render.flush = gen5_render_flush;
		1405	sna->render.reset = gen5_render_reset;
		1406	sna->render.fini = gen5_render_fini;
		1407
		1408	sna->render.max_3d_size = MAX_3D_SIZE;
		1409	sna->render.max_3d_pitch = 1 << 18;
		1410	return true;
		1411	}

Subversion Repositories Kolibri OS

(root)/drivers/video/Intel-2D/gen5_render.c @ 4251 – Rev 3280