WebSVN – Kolibri OS – Blame – /drivers/video/Intel-2D/gen4_render.c

Rev	Author	Line No.	Line
3291	Serge	1	/*
		2	* Copyright � 2006,2008,2011 Intel Corporation
		3	* Copyright � 2007 Red Hat, Inc.
		4	*
		5	* Permission is hereby granted, free of charge, to any person obtaining a
		6	* copy of this software and associated documentation files (the "Software"),
		7	* to deal in the Software without restriction, including without limitation
		8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		9	* and/or sell copies of the Software, and to permit persons to whom the
		10	* Software is furnished to do so, subject to the following conditions:
		11	*
		12	* The above copyright notice and this permission notice (including the next
		13	* paragraph) shall be included in all copies or substantial portions of the
		14	* Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
		22	* SOFTWARE.
		23	*
		24	* Authors:
		25	* Wang Zhenyu
		26	* Eric Anholt
		27	* Carl Worth
		28	* Keith Packard
		29	* Chris Wilson
		30	*
		31	*/
		32
		33	#ifdef HAVE_CONFIG_H
		34	#include "config.h"
		35	#endif
		36
		37	#include "sna.h"
		38	#include "sna_reg.h"
		39	#include "sna_render.h"
		40	#include "sna_render_inline.h"
		41	//#include "sna_video.h"
		42
		43	#include "brw/brw.h"
		44	#include "gen4_render.h"
		45	#include "gen4_source.h"
		46	#include "gen4_vertex.h"
		47
		48	/* gen4 has a serious issue with its shaders that we need to flush
		49	* after every rectangle... So until that is resolved, prefer
		50	* the BLT engine.
		51	*/
		52	#define FORCE_SPANS 0
		53	#define FORCE_NONRECTILINEAR_SPANS -1
		54
		55	#define NO_COMPOSITE 0
		56	#define NO_COMPOSITE_SPANS 0
		57	#define NO_COPY 0
		58	#define NO_COPY_BOXES 0
		59	#define NO_FILL 0
		60	#define NO_FILL_ONE 0
		61	#define NO_FILL_BOXES 0
		62	#define NO_VIDEO 0
		63
		64	#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
		65
		66	/* Set up a default static partitioning of the URB, which is supposed to
		67	* allow anything we would want to do, at potentially lower performance.
		68	*/
		69	#define URB_CS_ENTRY_SIZE 1
		70	#define URB_CS_ENTRIES 0
		71
		72	#define URB_VS_ENTRY_SIZE 1
		73	#define URB_VS_ENTRIES 32
		74
		75	#define URB_GS_ENTRY_SIZE 0
		76	#define URB_GS_ENTRIES 0
		77
		78	#define URB_CLIP_ENTRY_SIZE 0
		79	#define URB_CLIP_ENTRIES 0
		80
		81	#define URB_SF_ENTRY_SIZE 2
		82	#define URB_SF_ENTRIES 64
		83
		84	/*
		85	* this program computes dA/dx and dA/dy for the texture coordinates along
		86	* with the base texture coordinate. It was extracted from the Mesa driver
		87	*/
		88
		89	#define SF_KERNEL_NUM_GRF 16
		90	#define PS_KERNEL_NUM_GRF 32
		91
		92	#define GEN4_MAX_SF_THREADS 24
		93	#define GEN4_MAX_WM_THREADS 32
		94	#define G4X_MAX_WM_THREADS 50
		95
		96	static const uint32_t ps_kernel_packed_static[][4] = {
		97	#include "exa_wm_xy.g4b"
		98	#include "exa_wm_src_affine.g4b"
		99	#include "exa_wm_src_sample_argb.g4b"
		100	#include "exa_wm_yuv_rgb.g4b"
		101	#include "exa_wm_write.g4b"
		102	};
		103
		104	static const uint32_t ps_kernel_planar_static[][4] = {
		105	#include "exa_wm_xy.g4b"
		106	#include "exa_wm_src_affine.g4b"
		107	#include "exa_wm_src_sample_planar.g4b"
		108	#include "exa_wm_yuv_rgb.g4b"
		109	#include "exa_wm_write.g4b"
		110	};
		111
		112	#define NOKERNEL(kernel_enum, func, masked) \
		113	[kernel_enum] = {func, 0, masked}
		114	#define KERNEL(kernel_enum, kernel, masked) \
		115	[kernel_enum] = {&kernel, sizeof(kernel), masked}
		116	static const struct wm_kernel_info {
		117	const void *data;
		118	unsigned int size;
		119	bool has_mask;
		120	} wm_kernels[] = {
		121	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
		122	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
		123
		124	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
		125	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
		126
		127	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
		128	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
		129
		130	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
		131	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
		132
		133	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
		134	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
		135
		136	KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
		137	KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
		138	};
		139	#undef KERNEL
		140
		141	static const struct blendinfo {
		142	bool src_alpha;
		143	uint32_t src_blend;
		144	uint32_t dst_blend;
		145	} gen4_blend_op[] = {
		146	/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
		147	/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
		148	/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
		149	/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		150	/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
		151	/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
		152	/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
		153	/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
		154	/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		155	/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		156	/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
		157	/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		158	/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
		159	};
		160
		161	/**
		162	* Highest-valued BLENDFACTOR used in gen4_blend_op.
		163	*
		164	* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
		165	* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
		166	* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
		167	*/
		168	#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
		169
		170	#define BLEND_OFFSET(s, d) \
		171	(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
		172
		173	#define SAMPLER_OFFSET(sf, se, mf, me, k) \
		174	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
		175
		176	static void
		177	gen4_emit_pipelined_pointers(struct sna *sna,
		178	const struct sna_composite_op *op,
		179	int blend, int kernel);
		180
		181	#define OUT_BATCH(v) batch_emit(sna, v)
		182	#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
		183	#define OUT_VERTEX_F(v) vertex_emit(sna, v)
		184
		185	#define GEN4_MAX_3D_SIZE 8192
		186
		187	static inline bool too_large(int width, int height)
		188	{
		189	return width > GEN4_MAX_3D_SIZE \|\| height > GEN4_MAX_3D_SIZE;
		190	}
		191
		192	static int
		193	gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
		194	{
		195	int base;
		196
		197	if (has_mask) {
		198	if (is_ca) {
		199	if (gen4_blend_op[op].src_alpha)
		200	base = WM_KERNEL_MASKSA;
		201	else
		202	base = WM_KERNEL_MASKCA;
		203	} else
		204	base = WM_KERNEL_MASK;
		205	} else
		206	base = WM_KERNEL;
		207
		208	return base + !is_affine;
		209	}
		210
		211	static bool gen4_magic_ca_pass(struct sna *sna,
		212	const struct sna_composite_op *op)
		213	{
		214	struct gen4_render_state *state = &sna->render_state.gen4;
		215
		216	if (!op->need_magic_ca_pass)
		217	return false;
		218
		219	assert(sna->render.vertex_index > sna->render.vertex_start);
		220
		221	DBG(("%s: CA fixup\n", __FUNCTION__));
		222	assert(op->mask.bo != NULL);
		223	assert(op->has_component_alpha);
		224
		225	gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
		226	gen4_choose_composite_kernel(PictOpAdd,
		227	true, true, op->is_affine));
		228
		229	OUT_BATCH(GEN4_3DPRIMITIVE \|
		230	GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		231	(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		232	(0 << 9) \|
		233	4);
		234	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
		235	OUT_BATCH(sna->render.vertex_start);
		236	OUT_BATCH(1); /* single instance */
		237	OUT_BATCH(0); /* start instance location */
		238	OUT_BATCH(0); /* index buffer offset, ignored */
		239
		240	state->last_primitive = sna->kgem.nbatch;
		241	return true;
		242	}
		243
		244	static uint32_t gen4_get_blend(int op,
		245	bool has_component_alpha,
		246	uint32_t dst_format)
		247	{
		248	uint32_t src, dst;
		249
		250	src = GEN4_BLENDFACTOR_ONE; //gen4_blend_op[op].src_blend;
3769	Serge	251	dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
3291	Serge	252	#if 0
		253	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
		254	* it as always 1.
		255	*/
		256	if (PICT_FORMAT_A(dst_format) == 0) {
		257	if (src == GEN4_BLENDFACTOR_DST_ALPHA)
		258	src = GEN4_BLENDFACTOR_ONE;
		259	else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
		260	src = GEN4_BLENDFACTOR_ZERO;
		261	}
		262
		263	/* If the source alpha is being used, then we should only be in a
		264	* case where the source blend factor is 0, and the source blend
		265	* value is the mask channels multiplied by the source picture's alpha.
		266	*/
		267	if (has_component_alpha && gen4_blend_op[op].src_alpha) {
		268	if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
		269	dst = GEN4_BLENDFACTOR_SRC_COLOR;
		270	else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
		271	dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
		272	}
		273	#endif
		274	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
		275	op, dst_format, PICT_FORMAT_A(dst_format),
		276	src, dst, BLEND_OFFSET(src, dst)));
		277	return BLEND_OFFSET(src, dst);
		278	}
		279
		280	static uint32_t gen4_get_card_format(PictFormat format)
		281	{
		282	switch (format) {
		283	default:
		284	return -1;
		285	case PICT_a8r8g8b8:
		286	return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
		287	case PICT_x8r8g8b8:
		288	return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
		289	case PICT_a8:
		290	return GEN4_SURFACEFORMAT_A8_UNORM;
		291	}
		292	}
		293
		294	static uint32_t gen4_get_dest_format(PictFormat format)
		295	{
		296	switch (format) {
		297	default:
		298	return -1;
		299	case PICT_a8r8g8b8:
		300	case PICT_x8r8g8b8:
		301	return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
		302	case PICT_a8:
		303	return GEN4_SURFACEFORMAT_A8_UNORM;
		304	}
		305	}
		306
		307	typedef struct gen4_surface_state_padded {
		308	struct gen4_surface_state state;
		309	char pad[32 - sizeof(struct gen4_surface_state)];
		310	} gen4_surface_state_padded;
		311
		312	static void null_create(struct sna_static_stream *stream)
		313	{
		314	/* A bunch of zeros useful for legacy border color and depth-stencil */
		315	sna_static_stream_map(stream, 64, 64);
		316	}
		317
		318	static void
		319	sampler_state_init(struct gen4_sampler_state *sampler_state,
		320	sampler_filter_t filter,
		321	sampler_extend_t extend)
		322	{
		323	sampler_state->ss0.lod_preclamp = 1; /* GL mode */
		324
		325	/* We use the legacy mode to get the semantics specified by
		326	* the Render extension. */
		327	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
		328
		329	switch (filter) {
		330	default:
		331	case SAMPLER_FILTER_NEAREST:
		332	sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
		333	sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
		334	break;
		335	case SAMPLER_FILTER_BILINEAR:
		336	sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
		337	sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
		338	break;
		339	}
		340
		341	switch (extend) {
		342	default:
		343	case SAMPLER_EXTEND_NONE:
		344	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		345	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		346	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		347	break;
		348	case SAMPLER_EXTEND_REPEAT:
		349	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		350	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		351	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		352	break;
		353	case SAMPLER_EXTEND_PAD:
		354	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		355	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		356	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		357	break;
		358	case SAMPLER_EXTEND_REFLECT:
		359	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		360	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		361	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		362	break;
		363	}
		364	}
		365
		366	static uint32_t
		367	gen4_tiling_bits(uint32_t tiling)
		368	{
		369	switch (tiling) {
		370	default: assert(0);
		371	case I915_TILING_NONE: return 0;
		372	case I915_TILING_X: return GEN4_SURFACE_TILED;
		373	case I915_TILING_Y: return GEN4_SURFACE_TILED \| GEN4_SURFACE_TILED_Y;
		374	}
		375	}
		376
		377	/**
		378	* Sets up the common fields for a surface state buffer for the given
		379	* picture in the given surface state buffer.
		380	*/
		381	static uint32_t
		382	gen4_bind_bo(struct sna *sna,
		383	struct kgem_bo *bo,
		384	uint32_t width,
		385	uint32_t height,
		386	uint32_t format,
		387	bool is_dst)
		388	{
		389	uint32_t domains;
		390	uint16_t offset;
		391	uint32_t *ss;
		392
		393	assert(sna->kgem.gen != 040 \|\| !kgem_bo_is_snoop(bo));
		394
		395	/* After the first bind, we manage the cache domains within the batch */
		396	offset = kgem_bo_get_binding(bo, format);
		397	if (offset) {
		398	if (is_dst)
		399	kgem_bo_mark_dirty(bo);
		400	return offset * sizeof(uint32_t);
		401	}
		402
		403	offset = sna->kgem.surface -=
		404	sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		405	ss = sna->kgem.batch + offset;
		406
		407	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT \|
		408	GEN4_SURFACE_BLEND_ENABLED \|
		409	format << GEN4_SURFACE_FORMAT_SHIFT);
		410
		411	if (is_dst)
		412	domains = I915_GEM_DOMAIN_RENDER << 16 \| I915_GEM_DOMAIN_RENDER;
		413	else
		414	domains = I915_GEM_DOMAIN_SAMPLER << 16;
		415	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
		416
		417	ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT \|
		418	(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
		419	ss[3] = (gen4_tiling_bits(bo->tiling) \|
		420	(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
		421	ss[4] = 0;
		422	ss[5] = 0;
		423
		424	kgem_bo_set_binding(bo, format, offset);
		425
		426	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
		427	offset, bo->handle, ss[1],
		428	format, width, height, bo->pitch, bo->tiling,
		429	domains & 0xffff ? "render" : "sampler"));
		430
		431	return offset * sizeof(uint32_t);
		432	}
		433
		434	static void gen4_emit_vertex_buffer(struct sna *sna,
		435	const struct sna_composite_op *op)
		436	{
		437	int id = op->u.gen4.ve_id;
		438
		439	assert((sna->render.vb_id & (1 << id)) == 0);
		440
		441	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS \| 3);
		442	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) \| VB0_VERTEXDATA \|
		443	(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
		444	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
		445	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
		446	OUT_BATCH(0);
		447	OUT_BATCH(0);
		448	OUT_BATCH(0);
		449
		450	sna->render.vb_id \|= 1 << id;
		451	}
		452
		453	static void gen4_emit_primitive(struct sna *sna)
		454	{
		455	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
		456	sna->render.vertex_offset = sna->kgem.nbatch - 5;
		457	return;
		458	}
		459
		460	OUT_BATCH(GEN4_3DPRIMITIVE \|
		461	GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		462	(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		463	(0 << 9) \|
		464	4);
		465	sna->render.vertex_offset = sna->kgem.nbatch;
		466	OUT_BATCH(0); /* vertex count, to be filled in later */
		467	OUT_BATCH(sna->render.vertex_index);
		468	OUT_BATCH(1); /* single instance */
		469	OUT_BATCH(0); /* start instance location */
		470	OUT_BATCH(0); /* index buffer offset, ignored */
		471	sna->render.vertex_start = sna->render.vertex_index;
		472
		473	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
		474	}
		475
		476	static bool gen4_rectangle_begin(struct sna *sna,
		477	const struct sna_composite_op *op)
		478	{
		479	int id = op->u.gen4.ve_id;
		480	int ndwords;
		481
		482	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
		483	return true;
		484
		485	/* 7xpipelined pointers + 6xprimitive + 1xflush */
		486	ndwords = op->need_magic_ca_pass? 20 : 6;
		487	if ((sna->render.vb_id & (1 << id)) == 0)
		488	ndwords += 5;
		489
		490	if (!kgem_check_batch(&sna->kgem, ndwords))
		491	return false;
		492
		493	if ((sna->render.vb_id & (1 << id)) == 0)
		494	gen4_emit_vertex_buffer(sna, op);
		495	if (sna->render.vertex_offset == 0)
		496	gen4_emit_primitive(sna);
		497
		498	return true;
		499	}
		500
		501	static int gen4_get_rectangles__flush(struct sna *sna,
		502	const struct sna_composite_op *op)
		503	{
		504	/* Preventing discarding new vbo after lock contention */
		505	if (sna_vertex_wait__locked(&sna->render)) {
		506	int rem = vertex_space(sna);
		507	if (rem > op->floats_per_rect)
		508	return rem;
		509	}
		510
		511	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
		512	return 0;
		513	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
		514	return 0;
		515
		516	if (op->need_magic_ca_pass && sna->render.vbo)
		517	return 0;
		518
		519	if (sna->render.vertex_offset) {
		520	gen4_vertex_flush(sna);
		521	if (gen4_magic_ca_pass(sna, op))
		522	gen4_emit_pipelined_pointers(sna, op, op->op,
		523	op->u.gen4.wm_kernel);
		524	}
		525
		526	return gen4_vertex_finish(sna);
		527	}
		528
		529	inline static int gen4_get_rectangles(struct sna *sna,
		530	const struct sna_composite_op *op,
		531	int want,
		532	void (emit_state)(struct sna sna, const struct sna_composite_op *op))
		533	{
		534	int rem;
		535
		536	assert(want);
		537
		538	start:
		539	rem = vertex_space(sna);
		540	if (unlikely(rem < op->floats_per_rect)) {
		541	DBG(("flushing vbo for %s: %d < %d\n",
		542	__FUNCTION__, rem, op->floats_per_rect));
		543	rem = gen4_get_rectangles__flush(sna, op);
		544	if (unlikely(rem == 0))
		545	goto flush;
		546	}
		547
		548	if (unlikely(sna->render.vertex_offset == 0)) {
		549	if (!gen4_rectangle_begin(sna, op))
		550	goto flush;
		551	else
		552	goto start;
		553	}
		554
		555	assert(op->floats_per_rect >= vertex_space(sna));
		556	assert(rem <= vertex_space(sna));
		557	if (want > 1 && want * op->floats_per_rect > rem)
		558	want = rem / op->floats_per_rect;
		559
		560	sna->render.vertex_index += 3*want;
		561	return want;
		562
		563	flush:
		564	if (sna->render.vertex_offset) {
		565	gen4_vertex_flush(sna);
		566	gen4_magic_ca_pass(sna, op);
		567	}
		568	sna_vertex_wait__locked(&sna->render);
		569	_kgem_submit(&sna->kgem);
		570	emit_state(sna, op);
		571	goto start;
		572	}
		573
		574	static uint32_t *
		575	gen4_composite_get_binding_table(struct sna sna, uint16_t offset)
		576	{
		577	sna->kgem.surface -=
		578	sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		579
		580	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
		581
		582	/* Clear all surplus entries to zero in case of prefetch */
		583	*offset = sna->kgem.surface;
		584	return memset(sna->kgem.batch + sna->kgem.surface,
		585	0, sizeof(struct gen4_surface_state_padded));
		586	}
		587
		588	static void
		589	gen4_emit_urb(struct sna *sna)
		590	{
		591	int urb_vs_start, urb_vs_size;
		592	int urb_gs_start, urb_gs_size;
		593	int urb_clip_start, urb_clip_size;
		594	int urb_sf_start, urb_sf_size;
		595	int urb_cs_start, urb_cs_size;
		596
		597	if (!sna->render_state.gen4.needs_urb)
		598	return;
		599
		600	urb_vs_start = 0;
		601	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
		602	urb_gs_start = urb_vs_start + urb_vs_size;
		603	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
		604	urb_clip_start = urb_gs_start + urb_gs_size;
		605	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
		606	urb_sf_start = urb_clip_start + urb_clip_size;
		607	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
		608	urb_cs_start = urb_sf_start + urb_sf_size;
		609	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
		610
		611	while ((sna->kgem.nbatch & 15) > 12)
		612	OUT_BATCH(MI_NOOP);
		613
		614	OUT_BATCH(GEN4_URB_FENCE \|
		615	UF0_CS_REALLOC \|
		616	UF0_SF_REALLOC \|
		617	UF0_CLIP_REALLOC \|
		618	UF0_GS_REALLOC \|
		619	UF0_VS_REALLOC \|
		620	1);
		621	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) \|
		622	((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) \|
		623	((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
		624	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) \|
		625	((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
		626
		627	/* Constant buffer state */
		628	OUT_BATCH(GEN4_CS_URB_STATE \| 0);
		629	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 \| URB_CS_ENTRIES << 0);
		630
		631	sna->render_state.gen4.needs_urb = false;
		632	}
		633
		634	static void
		635	gen4_emit_state_base_address(struct sna *sna)
		636	{
		637	assert(sna->render_state.gen4.general_bo->proxy == NULL);
		638	OUT_BATCH(GEN4_STATE_BASE_ADDRESS \| 4);
		639	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
		640	sna->kgem.nbatch,
		641	sna->render_state.gen4.general_bo,
		642	I915_GEM_DOMAIN_INSTRUCTION << 16,
		643	BASE_ADDRESS_MODIFY));
		644	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
		645	sna->kgem.nbatch,
		646	NULL,
		647	I915_GEM_DOMAIN_INSTRUCTION << 16,
		648	BASE_ADDRESS_MODIFY));
		649	OUT_BATCH(0); /* media */
		650
		651	/* upper bounds, all disabled */
		652	OUT_BATCH(BASE_ADDRESS_MODIFY);
		653	OUT_BATCH(0);
		654	}
		655
		656	static void
		657	gen4_emit_invariant(struct sna *sna)
		658	{
		659	assert(sna->kgem.surface == sna->kgem.batch_size);
		660
		661	if (sna->kgem.gen >= 045)
		662	OUT_BATCH(NEW_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		663	else
		664	OUT_BATCH(GEN4_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		665
		666	gen4_emit_state_base_address(sna);
		667
		668	sna->render_state.gen4.needs_invariant = false;
		669	}
		670
		671	static void
		672	gen4_get_batch(struct sna sna, const struct sna_composite_op op)
		673	{
		674	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
		675
		676	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
		677	DBG(("%s: flushing batch: %d < %d+%d\n",
		678	__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
		679	150, 4*8));
		680	kgem_submit(&sna->kgem);
		681	_kgem_set_mode(&sna->kgem, KGEM_RENDER);
		682	}
		683
		684	if (sna->render_state.gen4.needs_invariant)
		685	gen4_emit_invariant(sna);
		686	}
		687
		688	static void
		689	gen4_align_vertex(struct sna sna, const struct sna_composite_op op)
		690	{
		691	assert(op->floats_per_rect == 3*op->floats_per_vertex);
		692	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
		693	if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
		694	gen4_vertex_finish(sna);
		695
		696	DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
		697	sna->render_state.gen4.floats_per_vertex,
		698	op->floats_per_vertex,
		699	sna->render.vertex_index,
		700	(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
		701	sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
		702	sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
		703	sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
		704	}
		705	}
		706
		707	static void
		708	gen4_emit_binding_table(struct sna *sna, uint16_t offset)
		709	{
		710	if (sna->render_state.gen4.surface_table == offset)
		711	return;
		712
		713	sna->render_state.gen4.surface_table = offset;
		714
		715	/* Binding table pointers */
		716	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS \| 4);
		717	OUT_BATCH(0); /* vs */
		718	OUT_BATCH(0); /* gs */
		719	OUT_BATCH(0); /* clip */
		720	OUT_BATCH(0); /* sf */
		721	/* Only the PS uses the binding table */
		722	OUT_BATCH(offset*4);
		723	}
		724
		725	static void
		726	gen4_emit_pipelined_pointers(struct sna *sna,
		727	const struct sna_composite_op *op,
		728	int blend, int kernel)
		729	{
		730	uint16_t sp, bp;
		731	uint32_t key;
		732
		733	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
		734	__FUNCTION__, op->u.gen4.ve_id & 2,
		735	op->src.filter, op->src.repeat,
		736	op->mask.filter, op->mask.repeat,
		737	kernel, blend, op->has_component_alpha, (int)op->dst.format));
		738
		739	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
		740	op->mask.filter, op->mask.repeat,
		741	kernel);
		742	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
		743
		744	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
		745	key = sp \| (uint32_t)bp << 16;
		746	if (key == sna->render_state.gen4.last_pipelined_pointers)
		747	return;
		748
		749	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS \| 5);
		750	OUT_BATCH(sna->render_state.gen4.vs);
		751	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
		752	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
		753	OUT_BATCH(sna->render_state.gen4.sf);
		754	OUT_BATCH(sna->render_state.gen4.wm + sp);
		755	OUT_BATCH(sna->render_state.gen4.cc + bp);
		756
		757	sna->render_state.gen4.last_pipelined_pointers = key;
		758	gen4_emit_urb(sna);
		759	}
		760
		761	static bool
		762	gen4_emit_drawing_rectangle(struct sna sna, const struct sna_composite_op op)
		763	{
		764	uint32_t limit = (op->dst.height - 1) << 16 \| (op->dst.width - 1);
		765	uint32_t offset = (uint16_t)op->dst.y << 16 \| (uint16_t)op->dst.x;
		766
		767	assert(!too_large(op->dst.x, op->dst.y));
		768	assert(!too_large(op->dst.width, op->dst.height));
		769
		770	if (sna->render_state.gen4.drawrect_limit == limit &&
		771	sna->render_state.gen4.drawrect_offset == offset)
		772	return true;
		773
		774	sna->render_state.gen4.drawrect_offset = offset;
		775	sna->render_state.gen4.drawrect_limit = limit;
		776
		777	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE \| (4 - 2));
		778	OUT_BATCH(0);
		779	OUT_BATCH(limit);
		780	OUT_BATCH(offset);
		781	return false;
		782	}
		783
		784	static void
		785	gen4_emit_vertex_elements(struct sna *sna,
		786	const struct sna_composite_op *op)
		787	{
		788	/*
		789	* vertex data in vertex buffer
		790	* position: (x, y)
		791	* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
		792	* texture coordinate 1 if (has_mask is true): same as above
		793	*/
		794	struct gen4_render_state *render = &sna->render_state.gen4;
		795	uint32_t src_format, dw;
		796	int id = op->u.gen4.ve_id;
		797
		798	if (render->ve_id == id)
		799	return;
		800	render->ve_id = id;
		801
		802	/* The VUE layout
		803	* dword 0-3: position (x, y, 1.0, 1.0),
		804	* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
		805	* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
		806	*/
		807	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS \| (2 * (1 + 2) - 1));
		808
		809	/* x,y */
		810	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		811	GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		812
		813	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT \|
		814	VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT \|
		815	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT \|
		816	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT \|
		817	(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		818
		819	/* u0, v0, w0 */
		820	/* u0, v0, w0 */
		821	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
		822	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		823	switch (id & 3) {
		824	default:
		825	assert(0);
		826	case 0:
		827	src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
		828	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		829	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		830	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		831	break;
		832	case 1:
		833	src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
		834	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		835	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		836	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		837	break;
		838	case 2:
		839	src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
		840	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		841	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		842	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		843	break;
		844	case 3:
		845	src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
		846	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		847	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		848	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		849	break;
		850	}
		851	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		852	src_format << VE0_FORMAT_SHIFT \|
		853	4 << VE0_OFFSET_SHIFT);
		854	OUT_BATCH(dw \| 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		855
		856	/* u1, v1, w1 */
		857	if (id >> 2) {
		858	unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
		859	DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
		860	id >> 2, src_offset));
		861	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		862	switch (id >> 2) {
		863	case 1:
		864	src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
		865	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		866	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		867	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		868	break;
		869	default:
		870	assert(0);
		871	case 2:
		872	src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
		873	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		874	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		875	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		876	break;
		877	case 3:
		878	src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
		879	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		880	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		881	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		882	break;
		883	}
		884	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		885	src_format << VE0_FORMAT_SHIFT \|
		886	src_offset << VE0_OFFSET_SHIFT);
		887	OUT_BATCH(dw \| 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		888	} else {
		889	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		890	GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		891
		892	OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT \|
		893	VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT \|
		894	VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT \|
		895	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT \|
		896	12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		897	}
		898	}
		899
		900	static void
		901	gen4_emit_state(struct sna *sna,
		902	const struct sna_composite_op *op,
		903	uint16_t wm_binding_table)
		904	{
		905	bool flush;
		906
		907	flush = wm_binding_table & 1;
		908	if (kgem_bo_is_dirty(op->src.bo) \|\| kgem_bo_is_dirty(op->mask.bo)) {
		909	DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
		910	kgem_bo_is_dirty(op->src.bo),
		911	kgem_bo_is_dirty(op->mask.bo),
		912	flush));
		913	OUT_BATCH(MI_FLUSH);
		914	kgem_clear_dirty(&sna->kgem);
		915	kgem_bo_mark_dirty(op->dst.bo);
		916	flush = false;
		917	}
		918	flush &= gen4_emit_drawing_rectangle(sna, op);
		919	if (flush && op->op > PictOpSrc)
		920	OUT_BATCH(MI_FLUSH \| MI_INHIBIT_RENDER_CACHE_FLUSH);
		921
		922	gen4_emit_binding_table(sna, wm_binding_table & ~1);
		923	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
		924	gen4_emit_vertex_elements(sna, op);
		925	}
		926
		927	static void
		928	gen4_bind_surfaces(struct sna *sna,
		929	const struct sna_composite_op *op)
		930	{
		931	bool dirty = kgem_bo_is_dirty(op->dst.bo);
		932	uint32_t *binding_table;
		933	uint16_t offset;
		934
		935	gen4_get_batch(sna, op);
		936
		937	binding_table = gen4_composite_get_binding_table(sna, &offset);
		938
		939	binding_table[0] =
		940	gen4_bind_bo(sna,
		941	op->dst.bo, op->dst.width, op->dst.height,
		942	gen4_get_dest_format(op->dst.format),
		943	true);
		944	binding_table[1] =
		945	gen4_bind_bo(sna,
		946	op->src.bo, op->src.width, op->src.height,
		947	op->src.card_format,
		948	false);
		949	if (op->mask.bo) {
		950	assert(op->u.gen4.ve_id >> 2);
		951	binding_table[2] =
		952	gen4_bind_bo(sna,
		953	op->mask.bo,
		954	op->mask.width,
		955	op->mask.height,
		956	op->mask.card_format,
		957	false);
		958	}
		959
		960	if (sna->kgem.surface == offset &&
		961	(uint64_t )(sna->kgem.batch + sna->render_state.gen4.surface_table) == (uint64_t)binding_table &&
		962	(op->mask.bo == NULL \|\|
		963	sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
		964	sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		965	offset = sna->render_state.gen4.surface_table;
		966	}
		967
		968	gen4_emit_state(sna, op, offset \| dirty);
		969	}
		970
		971	fastcall static void
		972	gen4_render_composite_blt(struct sna *sna,
		973	const struct sna_composite_op *op,
		974	const struct sna_composite_rectangles *r)
		975	{
		976	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
		977	__FUNCTION__,
		978	r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
		979	r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
		980	r->dst.x, r->dst.y, op->dst.x, op->dst.y,
		981	r->width, r->height));
		982
		983	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
		984	op->prim_emit(sna, op, r);
		985	}
		986
		987
		988
		989
		990
		991
		992
		993
		994
		995
		996
		997
		998
		999
		1000
		1001
		1002
		1003
		1004	static void
		1005	gen4_render_composite_done(struct sna *sna,
		1006	const struct sna_composite_op *op)
		1007	{
		1008	DBG(("%s()\n", __FUNCTION__));
		1009
		1010	if (sna->render.vertex_offset) {
		1011	gen4_vertex_flush(sna);
		1012	gen4_magic_ca_pass(sna, op);
		1013	}
		1014
		1015	}
		1016
		1017
		1018
		1019
		1020
		1021
		1022
		1023
		1024
		1025
		1026
		1027
		1028
		1029
		1030
		1031
		1032
		1033
		1034
		1035
		1036
		1037
		1038
		1039
		1040
		1041
		1042
		1043
		1044
		1045
		1046
		1047
		1048
		1049
		1050
		1051
		1052
		1053
		1054
		1055
		1056
		1057	static bool
		1058	gen4_blit_tex(struct sna *sna,
3769	Serge	1059	uint8_t op, bool scale,
3291	Serge	1060	PixmapPtr src, struct kgem_bo *src_bo,
		1061	PixmapPtr mask,struct kgem_bo *mask_bo,
		1062	PixmapPtr dst, struct kgem_bo *dst_bo,
		1063	int32_t src_x, int32_t src_y,
		1064	int32_t msk_x, int32_t msk_y,
		1065	int32_t dst_x, int32_t dst_y,
		1066	int32_t width, int32_t height,
		1067	struct sna_composite_op *tmp)
		1068	{
		1069
		1070	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
		1071	width, height, sna->kgem.ring));
		1072
		1073	tmp->op = PictOpSrc;
		1074
		1075	tmp->dst.pixmap = dst;
		1076	tmp->dst.bo = dst_bo;
		1077	tmp->dst.width = dst->drawable.width;
		1078	tmp->dst.height = dst->drawable.height;
		1079	tmp->dst.format = PICT_x8r8g8b8;
		1080
		1081
		1082	tmp->src.repeat = RepeatNone;
		1083	tmp->src.filter = PictFilterNearest;
		1084	tmp->src.is_affine = true;
		1085
		1086	tmp->src.bo = src_bo;
		1087	tmp->src.pict_format = PICT_x8r8g8b8;
		1088	tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
		1089	tmp->src.width = src->drawable.width;
		1090	tmp->src.height = src->drawable.height;
		1091
		1092	tmp->is_affine = tmp->src.is_affine;
		1093	tmp->has_component_alpha = false;
		1094	tmp->need_magic_ca_pass = false;
		1095
		1096	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
		1097	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
		1098	tmp->mask.is_affine = true;
		1099
		1100	tmp->mask.bo = mask_bo;
		1101	tmp->mask.pict_format = PIXMAN_a8;
		1102	tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
		1103	tmp->mask.width = mask->drawable.width;
		1104	tmp->mask.height = mask->drawable.height;
		1105
3769	Serge	1106	if( scale )
		1107	{
		1108	tmp->src.scale[0] = 1.f/width;
		1109	tmp->src.scale[1] = 1.f/height;
		1110	}
		1111	else
		1112	{
		1113	tmp->src.scale[0] = 1.f/src->drawable.width;
		1114	tmp->src.scale[1] = 1.f/src->drawable.height;
		1115	}
3291	Serge	1116	// tmp->src.offset[0] = -dst_x;
		1117	// tmp->src.offset[1] = -dst_y;
		1118
		1119
		1120	tmp->mask.scale[0] = 1.f/mask->drawable.width;
		1121	tmp->mask.scale[1] = 1.f/mask->drawable.height;
		1122	// tmp->mask.offset[0] = -dst_x;
		1123	// tmp->mask.offset[1] = -dst_y;
		1124
		1125	tmp->u.gen4.wm_kernel =
		1126	gen4_choose_composite_kernel(tmp->op,
		1127	tmp->mask.bo != NULL,
		1128	tmp->has_component_alpha,
		1129	tmp->is_affine);
		1130	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
		1131
		1132	tmp->blt = gen4_render_composite_blt;
		1133	tmp->done = gen4_render_composite_done;
		1134
		1135	if (!kgem_check_bo(&sna->kgem,
		1136	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		1137	NULL)) {
		1138	kgem_submit(&sna->kgem);
		1139	}
		1140
		1141	gen4_bind_surfaces(sna, tmp);
		1142	gen4_align_vertex(sna, tmp);
		1143	return true;
		1144	}
		1145
		1146	static void
		1147	gen4_render_flush(struct sna *sna)
		1148	{
		1149	gen4_vertex_close(sna);
		1150
		1151	assert(sna->render.vb_id == 0);
		1152	assert(sna->render.vertex_offset == 0);
		1153	}
		1154
		1155	static void
		1156	discard_vbo(struct sna *sna)
		1157	{
		1158	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
		1159	sna->render.vbo = NULL;
		1160	sna->render.vertices = sna->render.vertex_data;
		1161	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
		1162	sna->render.vertex_used = 0;
		1163	sna->render.vertex_index = 0;
		1164	}
		1165
		1166	static void
		1167	gen4_render_retire(struct kgem *kgem)
		1168	{
		1169	struct sna *sna;
		1170
		1171	sna = container_of(kgem, struct sna, kgem);
		1172	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
		1173	DBG(("%s: resetting idle vbo\n", __FUNCTION__));
		1174	sna->render.vertex_used = 0;
		1175	sna->render.vertex_index = 0;
		1176	}
		1177	}
		1178
		1179	static void
		1180	gen4_render_expire(struct kgem *kgem)
		1181	{
		1182	struct sna *sna;
		1183
		1184	sna = container_of(kgem, struct sna, kgem);
		1185	if (sna->render.vbo && !sna->render.vertex_used) {
		1186	DBG(("%s: discarding vbo\n", __FUNCTION__));
		1187	discard_vbo(sna);
		1188	}
		1189	}
		1190
		1191	static void gen4_render_reset(struct sna *sna)
		1192	{
		1193	sna->render_state.gen4.needs_invariant = true;
		1194	sna->render_state.gen4.needs_urb = true;
		1195	sna->render_state.gen4.ve_id = -1;
		1196	sna->render_state.gen4.last_primitive = -1;
		1197	sna->render_state.gen4.last_pipelined_pointers = -1;
		1198
		1199	sna->render_state.gen4.drawrect_offset = -1;
		1200	sna->render_state.gen4.drawrect_limit = -1;
		1201	sna->render_state.gen4.surface_table = -1;
		1202
		1203	if (sna->render.vbo &&
		1204	!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
		1205	DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
		1206	discard_vbo(sna);
		1207	}
		1208
		1209	sna->render.vertex_offset = 0;
		1210	sna->render.nvertex_reloc = 0;
		1211	sna->render.vb_id = 0;
		1212	}
		1213
		1214	static void gen4_render_fini(struct sna *sna)
		1215	{
		1216	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
		1217	}
		1218
		1219	static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
		1220	{
		1221	struct gen4_vs_unit_state vs = sna_static_stream_map(stream, sizeof(vs), 32);
		1222
		1223	/* Set up the vertex shader to be disabled (passthrough) */
		1224	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
		1225	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
		1226	vs->vs6.vs_enable = 0;
		1227	vs->vs6.vert_cache_disable = 1;
		1228
		1229	return sna_static_stream_offsetof(stream, vs);
		1230	}
		1231
		1232	static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
		1233	int gen, uint32_t kernel)
		1234	{
		1235	struct gen4_sf_unit_state *sf;
		1236
		1237	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
		1238
		1239	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
		1240	sf->thread0.kernel_start_pointer = kernel >> 6;
		1241	sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
		1242	sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
		1243	sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
		1244	/* don't smash vertex header, read start from dw8 */
		1245	sf->thread3.urb_entry_read_offset = 1;
		1246	sf->thread3.dispatch_grf_start_reg = 3;
		1247	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
		1248	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
		1249	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
		1250	sf->sf5.viewport_transform = false; /* skip viewport */
		1251	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
		1252	sf->sf6.scissor = 0;
		1253	sf->sf7.trifan_pv = 2;
		1254	sf->sf6.dest_org_vbias = 0x8;
		1255	sf->sf6.dest_org_hbias = 0x8;
		1256
		1257	return sna_static_stream_offsetof(stream, sf);
		1258	}
		1259
		1260	static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
		1261	sampler_filter_t src_filter,
		1262	sampler_extend_t src_extend,
		1263	sampler_filter_t mask_filter,
		1264	sampler_extend_t mask_extend)
		1265	{
		1266	struct gen4_sampler_state *sampler_state;
		1267
		1268	sampler_state = sna_static_stream_map(stream,
		1269	sizeof(struct gen4_sampler_state) * 2,
		1270	32);
		1271	sampler_state_init(&sampler_state[0], src_filter, src_extend);
		1272	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
		1273
		1274	return sna_static_stream_offsetof(stream, sampler_state);
		1275	}
		1276
		1277	static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
		1278	int gen,
		1279	bool has_mask,
		1280	uint32_t kernel,
		1281	uint32_t sampler)
		1282	{
		1283	assert((kernel & 63) == 0);
		1284	wm->thread0.kernel_start_pointer = kernel >> 6;
		1285	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
		1286
		1287	wm->thread1.single_program_flow = 0;
		1288
		1289	wm->thread3.const_urb_entry_read_length = 0;
		1290	wm->thread3.const_urb_entry_read_offset = 0;
		1291
		1292	wm->thread3.urb_entry_read_offset = 0;
		1293	wm->thread3.dispatch_grf_start_reg = 3;
		1294
		1295	assert((sampler & 31) == 0);
		1296	wm->wm4.sampler_state_pointer = sampler >> 5;
		1297	wm->wm4.sampler_count = 1;
		1298
		1299	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
		1300	wm->wm5.transposed_urb_read = 0;
		1301	wm->wm5.thread_dispatch_enable = 1;
		1302	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
		1303	* start point
		1304	*/
		1305	wm->wm5.enable_16_pix = 1;
		1306	wm->wm5.enable_8_pix = 0;
		1307	wm->wm5.early_depth_test = 1;
		1308
		1309	/* Each pair of attributes (src/mask coords) is two URB entries */
		1310	if (has_mask) {
		1311	wm->thread1.binding_table_entry_count = 3;
		1312	wm->thread3.urb_entry_read_length = 4;
		1313	} else {
		1314	wm->thread1.binding_table_entry_count = 2;
		1315	wm->thread3.urb_entry_read_length = 2;
		1316	}
		1317	}
		1318
		1319	static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
		1320	{
		1321	uint8_t ptr, base;
		1322	int i, j;
		1323
		1324	base = ptr =
		1325	sna_static_stream_map(stream,
		1326	GEN4_BLENDFACTOR_COUNTGEN4_BLENDFACTOR_COUNT64,
		1327	64);
		1328
		1329	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
		1330	for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
		1331	struct gen4_cc_unit_state *state =
		1332	(struct gen4_cc_unit_state *)ptr;
		1333
		1334	state->cc3.blend_enable =
		1335	!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
		1336
		1337	state->cc5.logicop_func = 0xc; /* COPY */
		1338	state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
		1339
		1340	/* Fill in alpha blend factors same as color, for the future. */
		1341	state->cc5.ia_src_blend_factor = i;
		1342	state->cc5.ia_dest_blend_factor = j;
		1343
		1344	state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
		1345	state->cc6.clamp_post_alpha_blend = 1;
		1346	state->cc6.clamp_pre_alpha_blend = 1;
		1347	state->cc6.src_blend_factor = i;
		1348	state->cc6.dest_blend_factor = j;
		1349
		1350	ptr += 64;
		1351	}
		1352	}
		1353
		1354	return sna_static_stream_offsetof(stream, base);
		1355	}
		1356
		1357	static bool gen4_render_setup(struct sna *sna)
		1358	{
		1359	struct gen4_render_state *state = &sna->render_state.gen4;
		1360	struct sna_static_stream general;
		1361	struct gen4_wm_unit_state_padded *wm_state;
		1362	uint32_t sf, wm[KERNEL_COUNT];
		1363	int i, j, k, l, m;
		1364
		1365	sna_static_stream_init(&general);
		1366
		1367	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
		1368	* dumps, you know it points to zero.
		1369	*/
		1370	null_create(&general);
		1371
		1372	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
		1373	for (m = 0; m < KERNEL_COUNT; m++) {
		1374	if (wm_kernels[m].size) {
		1375	wm[m] = sna_static_stream_add(&general,
		1376	wm_kernels[m].data,
		1377	wm_kernels[m].size,
		1378	64);
		1379	} else {
		1380	wm[m] = sna_static_stream_compile_wm(sna, &general,
		1381	wm_kernels[m].data,
		1382	16);
		1383	}
		1384	}
		1385
		1386	state->vs = gen4_create_vs_unit_state(&general);
		1387	state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
		1388
		1389	wm_state = sna_static_stream_map(&general,
		1390	sizeof(wm_state) KERNEL_COUNT *
		1391	FILTER_COUNT * EXTEND_COUNT *
		1392	FILTER_COUNT * EXTEND_COUNT,
		1393	64);
		1394	state->wm = sna_static_stream_offsetof(&general, wm_state);
		1395	for (i = 0; i < FILTER_COUNT; i++) {
		1396	for (j = 0; j < EXTEND_COUNT; j++) {
		1397	for (k = 0; k < FILTER_COUNT; k++) {
		1398	for (l = 0; l < EXTEND_COUNT; l++) {
		1399	uint32_t sampler_state;
		1400
		1401	sampler_state =
		1402	gen4_create_sampler_state(&general,
		1403	i, j,
		1404	k, l);
		1405
		1406	for (m = 0; m < KERNEL_COUNT; m++) {
		1407	gen4_init_wm_state(&wm_state->state,
		1408	sna->kgem.gen,
		1409	wm_kernels[m].has_mask,
		1410	wm[m], sampler_state);
		1411	wm_state++;
		1412	}
		1413	}
		1414	}
		1415	}
		1416	}
		1417
		1418	state->cc = gen4_create_cc_unit_state(&general);
		1419
		1420	state->general_bo = sna_static_stream_fini(sna, &general);
		1421	return state->general_bo != NULL;
		1422	}
		1423
		1424
		1425	bool gen4_render_init(struct sna *sna)
		1426	{
		1427	if (!gen4_render_setup(sna))
		1428	return false;
		1429
		1430	sna->kgem.retire = gen4_render_retire;
		1431	sna->kgem.expire = gen4_render_expire;
		1432
		1433	sna->render.prefer_gpu \|= PREFER_GPU_RENDER;
		1434
		1435	sna->render.blit_tex = gen4_blit_tex;
		1436
		1437
		1438	sna->render.flush = gen4_render_flush;
		1439	sna->render.reset = gen4_render_reset;
		1440	sna->render.fini = gen4_render_fini;
		1441
		1442	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
		1443	sna->render.max_3d_pitch = 1 << 18;
		1444	sna->render.caps = HW_BIT_BLIT \| HW_TEX_BLIT;
		1445
		1446	return true;
		1447	}
		1448

Subversion Repositories Kolibri OS

(root)/drivers/video/Intel-2D/gen4_render.c – Rev 3769