WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Intel-2D/sna/gen4_render.c

Rev	Author	Line No.	Line
4304	Serge	1	/*
		2	* Copyright � 2006,2008,2011 Intel Corporation
		3	* Copyright � 2007 Red Hat, Inc.
		4	*
		5	* Permission is hereby granted, free of charge, to any person obtaining a
		6	* copy of this software and associated documentation files (the "Software"),
		7	* to deal in the Software without restriction, including without limitation
		8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		9	* and/or sell copies of the Software, and to permit persons to whom the
		10	* Software is furnished to do so, subject to the following conditions:
		11	*
		12	* The above copyright notice and this permission notice (including the next
		13	* paragraph) shall be included in all copies or substantial portions of the
		14	* Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
		22	* SOFTWARE.
		23	*
		24	* Authors:
		25	* Wang Zhenyu
		26	* Eric Anholt
		27	* Carl Worth
		28	* Keith Packard
		29	* Chris Wilson
		30	*
		31	*/
		32
		33	#ifdef HAVE_CONFIG_H
		34	#include "config.h"
		35	#endif
		36
		37	#include "sna.h"
		38	#include "sna_reg.h"
		39	#include "sna_render.h"
		40	#include "sna_render_inline.h"
		41	//#include "sna_video.h"
		42
		43	#include "brw/brw.h"
4501	Serge	44	#include "gen4_common.h"
4304	Serge	45	#include "gen4_render.h"
		46	#include "gen4_source.h"
		47	#include "gen4_vertex.h"
		48
		49	/* gen4 has a serious issue with its shaders that we need to flush
		50	* after every rectangle... So until that is resolved, prefer
		51	* the BLT engine.
		52	*/
		53	#define FORCE_SPANS 0
		54	#define FORCE_NONRECTILINEAR_SPANS -1
		55	#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
		56
		57	#define NO_COMPOSITE 0
		58	#define NO_COMPOSITE_SPANS 0
		59	#define NO_COPY 0
		60	#define NO_COPY_BOXES 0
		61	#define NO_FILL 0
		62	#define NO_FILL_ONE 0
		63	#define NO_FILL_BOXES 0
		64	#define NO_VIDEO 0
		65
		66	#define MAX_FLUSH_VERTICES 6
		67
		68	#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
		69
		70	/* Set up a default static partitioning of the URB, which is supposed to
		71	* allow anything we would want to do, at potentially lower performance.
		72	*/
		73	#define URB_CS_ENTRY_SIZE 1
		74	#define URB_CS_ENTRIES 0
		75
		76	#define URB_VS_ENTRY_SIZE 1
		77	#define URB_VS_ENTRIES 32
		78
		79	#define URB_GS_ENTRY_SIZE 0
		80	#define URB_GS_ENTRIES 0
		81
		82	#define URB_CLIP_ENTRY_SIZE 0
		83	#define URB_CLIP_ENTRIES 0
		84
		85	#define URB_SF_ENTRY_SIZE 2
		86	#define URB_SF_ENTRIES 64
		87
		88	/*
		89	* this program computes dA/dx and dA/dy for the texture coordinates along
		90	* with the base texture coordinate. It was extracted from the Mesa driver
		91	*/
		92
		93	#define SF_KERNEL_NUM_GRF 16
		94	#define PS_KERNEL_NUM_GRF 32
		95
		96	#define GEN4_MAX_SF_THREADS 24
		97	#define GEN4_MAX_WM_THREADS 32
		98	#define G4X_MAX_WM_THREADS 50
		99
		100	static const uint32_t ps_kernel_packed_static[][4] = {
		101	#include "exa_wm_xy.g4b"
		102	#include "exa_wm_src_affine.g4b"
		103	#include "exa_wm_src_sample_argb.g4b"
		104	#include "exa_wm_yuv_rgb.g4b"
		105	#include "exa_wm_write.g4b"
		106	};
		107
		108	static const uint32_t ps_kernel_planar_static[][4] = {
		109	#include "exa_wm_xy.g4b"
		110	#include "exa_wm_src_affine.g4b"
		111	#include "exa_wm_src_sample_planar.g4b"
		112	#include "exa_wm_yuv_rgb.g4b"
		113	#include "exa_wm_write.g4b"
		114	};
		115
		116	#define NOKERNEL(kernel_enum, func, masked) \
		117	[kernel_enum] = {func, 0, masked}
		118	#define KERNEL(kernel_enum, kernel, masked) \
		119	[kernel_enum] = {&kernel, sizeof(kernel), masked}
		120	static const struct wm_kernel_info {
		121	const void *data;
		122	unsigned int size;
		123	bool has_mask;
		124	} wm_kernels[] = {
		125	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
		126	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
		127
		128	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
		129	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
		130
		131	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
		132	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
		133
		134	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
		135	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
		136
		137	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
		138	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
		139
		140	KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
		141	KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
		142	};
		143	#undef KERNEL
		144
		145	static const struct blendinfo {
		146	bool src_alpha;
		147	uint32_t src_blend;
		148	uint32_t dst_blend;
		149	} gen4_blend_op[] = {
		150	/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
		151	/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
		152	/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
		153	/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		154	/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
		155	/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
		156	/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
		157	/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
		158	/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		159	/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		160	/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
		161	/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
		162	/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
		163	};
		164
		165	/**
		166	* Highest-valued BLENDFACTOR used in gen4_blend_op.
		167	*
		168	* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
		169	* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
		170	* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
		171	*/
		172	#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
		173
		174	#define BLEND_OFFSET(s, d) \
		175	(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
		176
		177	#define SAMPLER_OFFSET(sf, se, mf, me, k) \
		178	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
		179
		180	static void
		181	gen4_emit_pipelined_pointers(struct sna *sna,
		182	const struct sna_composite_op *op,
		183	int blend, int kernel);
		184
		185	#define OUT_BATCH(v) batch_emit(sna, v)
		186	#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
		187	#define OUT_VERTEX_F(v) vertex_emit(sna, v)
		188
		189	#define GEN4_MAX_3D_SIZE 8192
		190
		191	static inline bool too_large(int width, int height)
		192	{
		193	return width > GEN4_MAX_3D_SIZE \|\| height > GEN4_MAX_3D_SIZE;
		194	}
		195
		196	static int
		197	gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
		198	{
		199	int base;
		200
		201	if (has_mask) {
		202	if (is_ca) {
		203	if (gen4_blend_op[op].src_alpha)
		204	base = WM_KERNEL_MASKSA;
		205	else
		206	base = WM_KERNEL_MASKCA;
		207	} else
		208	base = WM_KERNEL_MASK;
		209	} else
		210	base = WM_KERNEL;
		211
		212	return base + !is_affine;
		213	}
		214
		215	static bool gen4_magic_ca_pass(struct sna *sna,
		216	const struct sna_composite_op *op)
		217	{
		218	struct gen4_render_state *state = &sna->render_state.gen4;
		219
		220	if (!op->need_magic_ca_pass)
		221	return false;
		222
		223	assert(sna->render.vertex_index > sna->render.vertex_start);
		224
		225	DBG(("%s: CA fixup\n", __FUNCTION__));
		226	assert(op->mask.bo != NULL);
		227	assert(op->has_component_alpha);
		228
		229	gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
		230	gen4_choose_composite_kernel(PictOpAdd,
		231	true, true, op->is_affine));
		232
		233	OUT_BATCH(GEN4_3DPRIMITIVE \|
		234	GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		235	(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		236	(0 << 9) \|
		237	4);
		238	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
		239	OUT_BATCH(sna->render.vertex_start);
		240	OUT_BATCH(1); /* single instance */
		241	OUT_BATCH(0); /* start instance location */
		242	OUT_BATCH(0); /* index buffer offset, ignored */
		243
		244	state->last_primitive = sna->kgem.nbatch;
		245	return true;
		246	}
		247
		248	static uint32_t gen4_get_blend(int op,
		249	bool has_component_alpha,
		250	uint32_t dst_format)
		251	{
		252	uint32_t src, dst;
		253
		254	src = GEN4_BLENDFACTOR_ONE; //gen4_blend_op[op].src_blend;
		255	dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
		256	#if 0
		257	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
		258	* it as always 1.
		259	*/
		260	if (PICT_FORMAT_A(dst_format) == 0) {
		261	if (src == GEN4_BLENDFACTOR_DST_ALPHA)
		262	src = GEN4_BLENDFACTOR_ONE;
		263	else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
		264	src = GEN4_BLENDFACTOR_ZERO;
		265	}
		266
		267	/* If the source alpha is being used, then we should only be in a
		268	* case where the source blend factor is 0, and the source blend
		269	* value is the mask channels multiplied by the source picture's alpha.
		270	*/
		271	if (has_component_alpha && gen4_blend_op[op].src_alpha) {
		272	if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
		273	dst = GEN4_BLENDFACTOR_SRC_COLOR;
		274	else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
		275	dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
		276	}
		277	#endif
		278	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
		279	op, dst_format, PICT_FORMAT_A(dst_format),
		280	src, dst, BLEND_OFFSET(src, dst)));
		281	return BLEND_OFFSET(src, dst);
		282	}
		283
		284	static uint32_t gen4_get_card_format(PictFormat format)
		285	{
		286	switch (format) {
		287	default:
		288	return -1;
		289	case PICT_a8r8g8b8:
		290	return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
		291	case PICT_x8r8g8b8:
		292	return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
		293	case PICT_a8b8g8r8:
		294	return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
		295	case PICT_x8b8g8r8:
		296	return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
		297	case PICT_a2r10g10b10:
		298	return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
		299	case PICT_x2r10g10b10:
		300	return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
		301	case PICT_r8g8b8:
		302	return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
		303	case PICT_r5g6b5:
		304	return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
		305	case PICT_a1r5g5b5:
		306	return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
		307	case PICT_a8:
		308	return GEN4_SURFACEFORMAT_A8_UNORM;
		309	case PICT_a4r4g4b4:
		310	return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
		311	}
		312	}
		313
		314	static uint32_t gen4_get_dest_format(PictFormat format)
		315	{
		316	switch (format) {
		317	default:
		318	return -1;
		319	case PICT_a8r8g8b8:
		320	case PICT_x8r8g8b8:
		321	return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
		322	case PICT_a8b8g8r8:
		323	case PICT_x8b8g8r8:
		324	return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
		325	case PICT_a2r10g10b10:
		326	case PICT_x2r10g10b10:
		327	return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
		328	case PICT_r5g6b5:
		329	return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
		330	case PICT_x1r5g5b5:
		331	case PICT_a1r5g5b5:
		332	return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
		333	case PICT_a8:
		334	return GEN4_SURFACEFORMAT_A8_UNORM;
		335	case PICT_a4r4g4b4:
		336	case PICT_x4r4g4b4:
		337	return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
		338	}
		339	}
		340
		341	typedef struct gen4_surface_state_padded {
		342	struct gen4_surface_state state;
		343	char pad[32 - sizeof(struct gen4_surface_state)];
		344	} gen4_surface_state_padded;
		345
		346	static void null_create(struct sna_static_stream *stream)
		347	{
		348	/* A bunch of zeros useful for legacy border color and depth-stencil */
		349	sna_static_stream_map(stream, 64, 64);
		350	}
		351
		352	static void
		353	sampler_state_init(struct gen4_sampler_state *sampler_state,
		354	sampler_filter_t filter,
		355	sampler_extend_t extend)
		356	{
		357	sampler_state->ss0.lod_preclamp = 1; /* GL mode */
		358
		359	/* We use the legacy mode to get the semantics specified by
		360	* the Render extension. */
		361	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
		362
		363	switch (filter) {
		364	default:
		365	case SAMPLER_FILTER_NEAREST:
		366	sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
		367	sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
		368	break;
		369	case SAMPLER_FILTER_BILINEAR:
		370	sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
		371	sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
		372	break;
		373	}
		374
		375	switch (extend) {
		376	default:
		377	case SAMPLER_EXTEND_NONE:
		378	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		379	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		380	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
		381	break;
		382	case SAMPLER_EXTEND_REPEAT:
		383	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		384	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		385	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
		386	break;
		387	case SAMPLER_EXTEND_PAD:
		388	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		389	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		390	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
		391	break;
		392	case SAMPLER_EXTEND_REFLECT:
		393	sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		394	sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		395	sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
		396	break;
		397	}
		398	}
		399
		400	static uint32_t
		401	gen4_tiling_bits(uint32_t tiling)
		402	{
		403	switch (tiling) {
		404	default: assert(0);
		405	case I915_TILING_NONE: return 0;
		406	case I915_TILING_X: return GEN4_SURFACE_TILED;
		407	case I915_TILING_Y: return GEN4_SURFACE_TILED \| GEN4_SURFACE_TILED_Y;
		408	}
		409	}
		410
		411	/**
		412	* Sets up the common fields for a surface state buffer for the given
		413	* picture in the given surface state buffer.
		414	*/
		415	static uint32_t
		416	gen4_bind_bo(struct sna *sna,
		417	struct kgem_bo *bo,
		418	uint32_t width,
		419	uint32_t height,
		420	uint32_t format,
		421	bool is_dst)
		422	{
		423	uint32_t domains;
		424	uint16_t offset;
		425	uint32_t *ss;
		426
		427	assert(sna->kgem.gen != 040 \|\| !kgem_bo_is_snoop(bo));
		428
		429	/* After the first bind, we manage the cache domains within the batch */
		430	offset = kgem_bo_get_binding(bo, format \| is_dst << 31);
		431	if (offset) {
		432	if (is_dst)
		433	kgem_bo_mark_dirty(bo);
		434	return offset * sizeof(uint32_t);
		435	}
		436
		437	offset = sna->kgem.surface -=
		438	sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		439	ss = sna->kgem.batch + offset;
		440
		441	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT \|
		442	GEN4_SURFACE_BLEND_ENABLED \|
		443	format << GEN4_SURFACE_FORMAT_SHIFT);
		444
		445	if (is_dst) {
		446	ss[0] \|= GEN4_SURFACE_RC_READ_WRITE;
		447	domains = I915_GEM_DOMAIN_RENDER << 16 \| I915_GEM_DOMAIN_RENDER;
		448	} else
		449	domains = I915_GEM_DOMAIN_SAMPLER << 16;
		450	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
		451
		452	ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT \|
		453	(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
		454	ss[3] = (gen4_tiling_bits(bo->tiling) \|
		455	(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
		456	ss[4] = 0;
		457	ss[5] = 0;
		458
		459	kgem_bo_set_binding(bo, format \| is_dst << 31, offset);
		460
		461	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
		462	offset, bo->handle, ss[1],
		463	format, width, height, bo->pitch, bo->tiling,
		464	domains & 0xffff ? "render" : "sampler"));
		465
		466	return offset * sizeof(uint32_t);
		467	}
		468
		469	static void gen4_emit_vertex_buffer(struct sna *sna,
		470	const struct sna_composite_op *op)
		471	{
		472	int id = op->u.gen4.ve_id;
		473
		474	assert((sna->render.vb_id & (1 << id)) == 0);
		475
		476	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS \| 3);
		477	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) \| VB0_VERTEXDATA \|
		478	(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
		479	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
		480	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
		481	OUT_BATCH(0);
		482	OUT_BATCH(0);
		483	OUT_BATCH(0);
		484
		485	sna->render.vb_id \|= 1 << id;
		486	}
		487
		488	static void gen4_emit_primitive(struct sna *sna)
		489	{
		490	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
		491	sna->render.vertex_offset = sna->kgem.nbatch - 5;
		492	return;
		493	}
		494
		495	OUT_BATCH(GEN4_3DPRIMITIVE \|
		496	GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL \|
		497	(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) \|
		498	(0 << 9) \|
		499	4);
		500	sna->render.vertex_offset = sna->kgem.nbatch;
		501	OUT_BATCH(0); /* vertex count, to be filled in later */
		502	OUT_BATCH(sna->render.vertex_index);
		503	OUT_BATCH(1); /* single instance */
		504	OUT_BATCH(0); /* start instance location */
		505	OUT_BATCH(0); /* index buffer offset, ignored */
		506	sna->render.vertex_start = sna->render.vertex_index;
		507
		508	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
		509	}
		510
		511	static bool gen4_rectangle_begin(struct sna *sna,
		512	const struct sna_composite_op *op)
		513	{
		514	unsigned int id = 1 << op->u.gen4.ve_id;
		515	int ndwords;
		516
		517	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
		518	return true;
		519
		520	/* 7xpipelined pointers + 6xprimitive + 1xflush */
		521	ndwords = op->need_magic_ca_pass? 20 : 6;
		522	if ((sna->render.vb_id & id) == 0)
		523	ndwords += 5;
		524	ndwords += 2*FORCE_FLUSH;
		525
		526	if (!kgem_check_batch(&sna->kgem, ndwords))
		527	return false;
		528
		529	if ((sna->render.vb_id & id) == 0)
		530	gen4_emit_vertex_buffer(sna, op);
		531	if (sna->render.vertex_offset == 0)
		532	gen4_emit_primitive(sna);
		533
		534	return true;
		535	}
		536
		537	static int gen4_get_rectangles__flush(struct sna *sna,
		538	const struct sna_composite_op *op)
		539	{
		540	/* Preventing discarding new vbo after lock contention */
		541	if (sna_vertex_wait__locked(&sna->render)) {
		542	int rem = vertex_space(sna);
		543	if (rem > op->floats_per_rect)
		544	return rem;
		545	}
		546
		547	if (!kgem_check_batch(&sna->kgem,
		548	2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
		549	return 0;
		550	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
		551	return 0;
		552
		553	if (sna->render.vertex_offset) {
		554	gen4_vertex_flush(sna);
		555	if (gen4_magic_ca_pass(sna, op))
		556	gen4_emit_pipelined_pointers(sna, op, op->op,
		557	op->u.gen4.wm_kernel);
		558	}
		559
		560	return gen4_vertex_finish(sna);
		561	}
		562
		563	inline static int gen4_get_rectangles(struct sna *sna,
		564	const struct sna_composite_op *op,
		565	int want,
		566	void (emit_state)(struct sna sna, const struct sna_composite_op *op))
		567	{
		568	int rem;
		569
		570	assert(want);
		571	#if FORCE_FLUSH
		572	rem = sna->render.vertex_offset;
		573	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
		574	rem = sna->kgem.nbatch - 5;
		575	if (rem) {
		576	rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
		577	if (rem <= 0) {
		578	if (sna->render.vertex_offset) {
		579	gen4_vertex_flush(sna);
		580	if (gen4_magic_ca_pass(sna, op))
		581	gen4_emit_pipelined_pointers(sna, op, op->op,
		582	op->u.gen4.wm_kernel);
		583	}
		584	OUT_BATCH(MI_FLUSH \| MI_INHIBIT_RENDER_CACHE_FLUSH);
		585	rem = MAX_FLUSH_VERTICES;
		586	}
		587	} else
		588	rem = MAX_FLUSH_VERTICES;
		589	if (want > rem)
		590	want = rem;
		591	#endif
		592
		593	start:
		594	rem = vertex_space(sna);
		595	if (unlikely(rem < op->floats_per_rect)) {
		596	DBG(("flushing vbo for %s: %d < %d\n",
		597	__FUNCTION__, rem, op->floats_per_rect));
		598	rem = gen4_get_rectangles__flush(sna, op);
		599	if (unlikely(rem == 0))
		600	goto flush;
		601	}
		602
		603	if (unlikely(sna->render.vertex_offset == 0)) {
		604	if (!gen4_rectangle_begin(sna, op))
		605	goto flush;
		606	else
		607	goto start;
		608	}
		609
		610	assert(rem <= vertex_space(sna));
		611	assert(op->floats_per_rect <= rem);
		612	if (want > 1 && want * op->floats_per_rect > rem)
		613	want = rem / op->floats_per_rect;
		614
		615	sna->render.vertex_index += 3*want;
		616	return want;
		617
		618	flush:
		619	if (sna->render.vertex_offset) {
		620	gen4_vertex_flush(sna);
		621	gen4_magic_ca_pass(sna, op);
		622	}
		623	sna_vertex_wait__locked(&sna->render);
		624	_kgem_submit(&sna->kgem);
		625	emit_state(sna, op);
		626	goto start;
		627	}
		628
		629	static uint32_t *
		630	gen4_composite_get_binding_table(struct sna sna, uint16_t offset)
		631	{
		632	sna->kgem.surface -=
		633	sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		634
		635	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
		636
		637	/* Clear all surplus entries to zero in case of prefetch */
		638	*offset = sna->kgem.surface;
		639	return memset(sna->kgem.batch + sna->kgem.surface,
		640	0, sizeof(struct gen4_surface_state_padded));
		641	}
		642
		643	static void
		644	gen4_emit_urb(struct sna *sna)
		645	{
		646	int urb_vs_start, urb_vs_size;
		647	int urb_gs_start, urb_gs_size;
		648	int urb_clip_start, urb_clip_size;
		649	int urb_sf_start, urb_sf_size;
		650	int urb_cs_start, urb_cs_size;
		651
		652	if (!sna->render_state.gen4.needs_urb)
		653	return;
		654
		655	urb_vs_start = 0;
		656	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
		657	urb_gs_start = urb_vs_start + urb_vs_size;
		658	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
		659	urb_clip_start = urb_gs_start + urb_gs_size;
		660	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
		661	urb_sf_start = urb_clip_start + urb_clip_size;
		662	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
		663	urb_cs_start = urb_sf_start + urb_sf_size;
		664	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
		665
		666	while ((sna->kgem.nbatch & 15) > 12)
		667	OUT_BATCH(MI_NOOP);
		668
		669	OUT_BATCH(GEN4_URB_FENCE \|
		670	UF0_CS_REALLOC \|
		671	UF0_SF_REALLOC \|
		672	UF0_CLIP_REALLOC \|
		673	UF0_GS_REALLOC \|
		674	UF0_VS_REALLOC \|
		675	1);
		676	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) \|
		677	((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) \|
		678	((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
		679	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) \|
		680	((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
		681
		682	/* Constant buffer state */
		683	OUT_BATCH(GEN4_CS_URB_STATE \| 0);
		684	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 \| URB_CS_ENTRIES << 0);
		685
		686	sna->render_state.gen4.needs_urb = false;
		687	}
		688
		689	static void
		690	gen4_emit_state_base_address(struct sna *sna)
		691	{
		692	assert(sna->render_state.gen4.general_bo->proxy == NULL);
		693	OUT_BATCH(GEN4_STATE_BASE_ADDRESS \| 4);
		694	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
		695	sna->kgem.nbatch,
		696	sna->render_state.gen4.general_bo,
		697	I915_GEM_DOMAIN_INSTRUCTION << 16,
		698	BASE_ADDRESS_MODIFY));
		699	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
		700	sna->kgem.nbatch,
		701	NULL,
		702	I915_GEM_DOMAIN_INSTRUCTION << 16,
		703	BASE_ADDRESS_MODIFY));
		704	OUT_BATCH(0); /* media */
		705
		706	/* upper bounds, all disabled */
		707	OUT_BATCH(BASE_ADDRESS_MODIFY);
		708	OUT_BATCH(0);
		709	}
		710
		711	static void
		712	gen4_emit_invariant(struct sna *sna)
		713	{
		714	assert(sna->kgem.surface == sna->kgem.batch_size);
		715
		716	if (sna->kgem.gen >= 045)
		717	OUT_BATCH(NEW_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		718	else
		719	OUT_BATCH(GEN4_PIPELINE_SELECT \| PIPELINE_SELECT_3D);
		720
		721	gen4_emit_state_base_address(sna);
		722
		723	sna->render_state.gen4.needs_invariant = false;
		724	}
		725
		726	static void
		727	gen4_get_batch(struct sna sna, const struct sna_composite_op op)
		728	{
		729	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
		730
		731	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
		732	DBG(("%s: flushing batch: %d < %d+%d\n",
		733	__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
		734	150, 4*8));
		735	kgem_submit(&sna->kgem);
		736	_kgem_set_mode(&sna->kgem, KGEM_RENDER);
		737	}
		738
		739	if (sna->render_state.gen4.needs_invariant)
		740	gen4_emit_invariant(sna);
		741	}
		742
		743	static void
		744	gen4_align_vertex(struct sna sna, const struct sna_composite_op op)
		745	{
		746	assert(op->floats_per_rect == 3*op->floats_per_vertex);
		747	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
4501	Serge	748	DBG(("aligning vertex: was %d, now %d floats per vertex\n",
4304	Serge	749	sna->render_state.gen4.floats_per_vertex,
4501	Serge	750	op->floats_per_vertex));
		751	gen4_vertex_align(sna, op);
4304	Serge	752	sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
		753	}
		754	}
		755
		756	static void
		757	gen4_emit_binding_table(struct sna *sna, uint16_t offset)
		758	{
		759	if (sna->render_state.gen4.surface_table == offset)
		760	return;
		761
		762	sna->render_state.gen4.surface_table = offset;
		763
		764	/* Binding table pointers */
		765	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS \| 4);
		766	OUT_BATCH(0); /* vs */
		767	OUT_BATCH(0); /* gs */
		768	OUT_BATCH(0); /* clip */
		769	OUT_BATCH(0); /* sf */
		770	/* Only the PS uses the binding table */
		771	OUT_BATCH(offset*4);
		772	}
		773
		774	static void
		775	gen4_emit_pipelined_pointers(struct sna *sna,
		776	const struct sna_composite_op *op,
		777	int blend, int kernel)
		778	{
		779	uint16_t sp, bp;
		780	uint32_t key;
		781
		782	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
		783	__FUNCTION__, op->u.gen4.ve_id & 2,
		784	op->src.filter, op->src.repeat,
		785	op->mask.filter, op->mask.repeat,
		786	kernel, blend, op->has_component_alpha, (int)op->dst.format));
		787
		788	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
		789	op->mask.filter, op->mask.repeat,
		790	kernel);
		791	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
		792
		793	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
		794	key = sp \| (uint32_t)bp << 16;
		795	if (key == sna->render_state.gen4.last_pipelined_pointers)
		796	return;
		797
		798	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS \| 5);
		799	OUT_BATCH(sna->render_state.gen4.vs);
		800	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
		801	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
		802	OUT_BATCH(sna->render_state.gen4.sf);
		803	OUT_BATCH(sna->render_state.gen4.wm + sp);
		804	OUT_BATCH(sna->render_state.gen4.cc + bp);
		805
		806	sna->render_state.gen4.last_pipelined_pointers = key;
		807	gen4_emit_urb(sna);
		808	}
		809
		810	static bool
		811	gen4_emit_drawing_rectangle(struct sna sna, const struct sna_composite_op op)
		812	{
		813	uint32_t limit = (op->dst.height - 1) << 16 \| (op->dst.width - 1);
		814	uint32_t offset = (uint16_t)op->dst.y << 16 \| (uint16_t)op->dst.x;
		815
		816	assert(!too_large(op->dst.x, op->dst.y));
		817	assert(!too_large(op->dst.width, op->dst.height));
		818
		819	if (sna->render_state.gen4.drawrect_limit == limit &&
		820	sna->render_state.gen4.drawrect_offset == offset)
		821	return true;
		822
		823	sna->render_state.gen4.drawrect_offset = offset;
		824	sna->render_state.gen4.drawrect_limit = limit;
		825
		826	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE \| (4 - 2));
		827	OUT_BATCH(0);
		828	OUT_BATCH(limit);
		829	OUT_BATCH(offset);
		830	return false;
		831	}
		832
		833	static void
		834	gen4_emit_vertex_elements(struct sna *sna,
		835	const struct sna_composite_op *op)
		836	{
		837	/*
		838	* vertex data in vertex buffer
		839	* position: (x, y)
		840	* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
		841	* texture coordinate 1 if (has_mask is true): same as above
		842	*/
		843	struct gen4_render_state *render = &sna->render_state.gen4;
		844	uint32_t src_format, dw;
		845	int id = op->u.gen4.ve_id;
		846
		847	if (render->ve_id == id)
		848	return;
		849	render->ve_id = id;
		850
		851	/* The VUE layout
		852	* dword 0-3: position (x, y, 1.0, 1.0),
		853	* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
		854	* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
		855	*/
		856	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS \| (2 * (1 + 2) - 1));
		857
		858	/* x,y */
		859	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		860	GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		861
		862	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT \|
		863	VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT \|
		864	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT \|
		865	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT \|
		866	(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		867
		868	/* u0, v0, w0 */
		869	/* u0, v0, w0 */
		870	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
		871	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		872	switch (id & 3) {
		873	default:
		874	assert(0);
		875	case 0:
		876	src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
		877	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		878	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		879	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		880	break;
		881	case 1:
		882	src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
		883	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		884	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		885	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		886	break;
		887	case 2:
		888	src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
		889	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		890	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		891	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		892	break;
		893	case 3:
		894	src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
		895	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		896	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		897	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		898	break;
		899	}
		900	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		901	src_format << VE0_FORMAT_SHIFT \|
		902	4 << VE0_OFFSET_SHIFT);
		903	OUT_BATCH(dw \| 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		904
		905	/* u1, v1, w1 */
		906	if (id >> 2) {
		907	unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
		908	DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
		909	id >> 2, src_offset));
		910	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
		911	switch (id >> 2) {
		912	case 1:
		913	src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
		914	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		915	dw \|= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
		916	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		917	break;
		918	default:
		919	assert(0);
		920	case 2:
		921	src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
		922	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		923	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		924	dw \|= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
		925	break;
		926	case 3:
		927	src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
		928	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
		929	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
		930	dw \|= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
		931	break;
		932	}
		933	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		934	src_format << VE0_FORMAT_SHIFT \|
		935	src_offset << VE0_OFFSET_SHIFT);
		936	OUT_BATCH(dw \| 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		937	} else {
		938	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT \| VE0_VALID \|
		939	GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT \|
		940
		941	OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT \|
		942	VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT \|
		943	VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT \|
		944	VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT \|
		945	12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
		946	}
		947	}
		948
		949	static void
		950	gen4_emit_state(struct sna *sna,
		951	const struct sna_composite_op *op,
		952	uint16_t wm_binding_table)
		953	{
		954	bool flush;
		955
		956	assert(op->dst.bo->exec);
		957
		958	flush = wm_binding_table & 1;
		959	if (kgem_bo_is_dirty(op->src.bo) \|\| kgem_bo_is_dirty(op->mask.bo)) {
		960	DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
		961	kgem_bo_is_dirty(op->src.bo),
		962	kgem_bo_is_dirty(op->mask.bo),
		963	flush));
		964	OUT_BATCH(MI_FLUSH);
		965	kgem_clear_dirty(&sna->kgem);
		966	kgem_bo_mark_dirty(op->dst.bo);
		967	flush = false;
		968	}
		969	flush &= gen4_emit_drawing_rectangle(sna, op);
		970	if (flush && op->op > PictOpSrc)
		971	OUT_BATCH(MI_FLUSH \| MI_INHIBIT_RENDER_CACHE_FLUSH);
		972
		973	gen4_emit_binding_table(sna, wm_binding_table & ~1);
		974	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
		975	gen4_emit_vertex_elements(sna, op);
		976	}
		977
		978	static void
		979	gen4_bind_surfaces(struct sna *sna,
		980	const struct sna_composite_op *op)
		981	{
		982	bool dirty = kgem_bo_is_dirty(op->dst.bo);
		983	uint32_t *binding_table;
		984	uint16_t offset;
		985
		986	gen4_get_batch(sna, op);
		987
		988	binding_table = gen4_composite_get_binding_table(sna, &offset);
		989
		990	binding_table[0] =
		991	gen4_bind_bo(sna,
		992	op->dst.bo, op->dst.width, op->dst.height,
		993	gen4_get_dest_format(op->dst.format),
		994	true);
		995	binding_table[1] =
		996	gen4_bind_bo(sna,
		997	op->src.bo, op->src.width, op->src.height,
		998	op->src.card_format,
		999	false);
		1000	if (op->mask.bo) {
		1001	assert(op->u.gen4.ve_id >> 2);
		1002	binding_table[2] =
		1003	gen4_bind_bo(sna,
		1004	op->mask.bo,
		1005	op->mask.width,
		1006	op->mask.height,
		1007	op->mask.card_format,
		1008	false);
		1009	}
		1010
		1011	if (sna->kgem.surface == offset &&
		1012	(uint64_t )(sna->kgem.batch + sna->render_state.gen4.surface_table) == (uint64_t)binding_table &&
		1013	(op->mask.bo == NULL \|\|
		1014	sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
		1015	sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		1016	offset = sna->render_state.gen4.surface_table;
		1017	}
		1018
		1019	gen4_emit_state(sna, op, offset \| dirty);
		1020	}
		1021
		1022	fastcall static void
		1023	gen4_render_composite_blt(struct sna *sna,
		1024	const struct sna_composite_op *op,
		1025	const struct sna_composite_rectangles *r)
		1026	{
		1027	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
		1028	__FUNCTION__,
		1029	r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
		1030	r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
		1031	r->dst.x, r->dst.y, op->dst.x, op->dst.y,
		1032	r->width, r->height));
		1033
		1034	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
		1035	op->prim_emit(sna, op, r);
		1036	}
		1037
		1038	#if 0
		1039	fastcall static void
		1040	gen4_render_composite_box(struct sna *sna,
		1041	const struct sna_composite_op *op,
		1042	const BoxRec *box)
		1043	{
		1044	struct sna_composite_rectangles r;
		1045
		1046	DBG((" %s: (%d, %d), (%d, %d)\n",
		1047	__FUNCTION__,
		1048	box->x1, box->y1, box->x2, box->y2));
		1049
		1050	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
		1051
		1052	r.dst.x = box->x1;
		1053	r.dst.y = box->y1;
		1054	r.width = box->x2 - box->x1;
		1055	r.height = box->y2 - box->y1;
		1056	r.mask = r.src = r.dst;
		1057
		1058	op->prim_emit(sna, op, &r);
		1059	}
		1060
		1061	static void
		1062	gen4_render_composite_boxes__blt(struct sna *sna,
		1063	const struct sna_composite_op *op,
		1064	const BoxRec *box, int nbox)
		1065	{
		1066	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
		1067	__FUNCTION__, nbox, op->dst.x, op->dst.y,
		1068	op->src.offset[0], op->src.offset[1],
		1069	op->src.width, op->src.height,
		1070	op->mask.offset[0], op->mask.offset[1],
		1071	op->mask.width, op->mask.height));
		1072
		1073	do {
		1074	int nbox_this_time;
		1075
		1076	nbox_this_time = gen4_get_rectangles(sna, op, nbox,
		1077	gen4_bind_surfaces);
		1078	nbox -= nbox_this_time;
		1079
		1080	do {
		1081	struct sna_composite_rectangles r;
		1082
		1083	DBG((" %s: (%d, %d), (%d, %d)\n",
		1084	__FUNCTION__,
		1085	box->x1, box->y1, box->x2, box->y2));
		1086
		1087	r.dst.x = box->x1;
		1088	r.dst.y = box->y1;
		1089	r.width = box->x2 - box->x1;
		1090	r.height = box->y2 - box->y1;
		1091	r.mask = r.src = r.dst;
		1092	op->prim_emit(sna, op, &r);
		1093	box++;
		1094	} while (--nbox_this_time);
		1095	} while (nbox);
		1096	}
		1097
		1098	static void
		1099	gen4_render_composite_boxes(struct sna *sna,
		1100	const struct sna_composite_op *op,
		1101	const BoxRec *box, int nbox)
		1102	{
		1103	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
		1104
		1105	do {
		1106	int nbox_this_time;
		1107	float *v;
		1108
		1109	nbox_this_time = gen4_get_rectangles(sna, op, nbox,
		1110	gen4_bind_surfaces);
		1111	assert(nbox_this_time);
		1112	nbox -= nbox_this_time;
		1113
		1114	v = sna->render.vertices + sna->render.vertex_used;
		1115	sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
		1116
		1117	op->emit_boxes(op, box, nbox_this_time, v);
		1118	box += nbox_this_time;
		1119	} while (nbox);
		1120	}
		1121
		1122	#if !FORCE_FLUSH
		1123	static void
		1124	gen4_render_composite_boxes__thread(struct sna *sna,
		1125	const struct sna_composite_op *op,
		1126	const BoxRec *box, int nbox)
		1127	{
		1128	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
		1129
		1130	sna_vertex_lock(&sna->render);
		1131	do {
		1132	int nbox_this_time;
		1133	float *v;
		1134
		1135	nbox_this_time = gen4_get_rectangles(sna, op, nbox,
		1136	gen4_bind_surfaces);
		1137	assert(nbox_this_time);
		1138	nbox -= nbox_this_time;
		1139
		1140	v = sna->render.vertices + sna->render.vertex_used;
		1141	sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
		1142
		1143	sna_vertex_acquire__locked(&sna->render);
		1144	sna_vertex_unlock(&sna->render);
		1145
		1146	op->emit_boxes(op, box, nbox_this_time, v);
		1147	box += nbox_this_time;
		1148
		1149	sna_vertex_lock(&sna->render);
		1150	sna_vertex_release__locked(&sna->render);
		1151	} while (nbox);
		1152	sna_vertex_unlock(&sna->render);
		1153	}
		1154	#endif
		1155
		1156	#ifndef MAX
		1157	#define MAX(a,b) ((a) > (b) ? (a) : (b))
		1158	#endif
		1159
		1160	static uint32_t gen4_bind_video_source(struct sna *sna,
		1161	struct kgem_bo *src_bo,
		1162	uint32_t src_offset,
		1163	int src_width,
		1164	int src_height,
		1165	int src_pitch,
		1166	uint32_t src_surf_format)
		1167	{
		1168	struct gen4_surface_state *ss;
		1169
		1170	sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
		1171
		1172	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
		1173	ss->ss0.surface_type = GEN4_SURFACE_2D;
		1174	ss->ss0.surface_format = src_surf_format;
		1175	ss->ss0.color_blend = 1;
		1176
		1177	ss->ss1.base_addr =
		1178	kgem_add_reloc(&sna->kgem,
		1179	sna->kgem.surface + 1,
		1180	src_bo,
		1181	I915_GEM_DOMAIN_SAMPLER << 16,
		1182	src_offset);
		1183
		1184	ss->ss2.width = src_width - 1;
		1185	ss->ss2.height = src_height - 1;
		1186	ss->ss3.pitch = src_pitch - 1;
		1187
		1188	return sna->kgem.surface * sizeof(uint32_t);
		1189	}
		1190
		1191	static void gen4_video_bind_surfaces(struct sna *sna,
		1192	const struct sna_composite_op *op)
		1193	{
		1194	bool dirty = kgem_bo_is_dirty(op->dst.bo);
		1195	struct sna_video_frame *frame = op->priv;
		1196	uint32_t src_surf_format;
		1197	uint32_t src_surf_base[6];
		1198	int src_width[6];
		1199	int src_height[6];
		1200	int src_pitch[6];
		1201	uint32_t *binding_table;
		1202	uint16_t offset;
		1203	int n_src, n;
		1204
		1205	src_surf_base[0] = 0;
		1206	src_surf_base[1] = 0;
		1207	src_surf_base[2] = frame->VBufOffset;
		1208	src_surf_base[3] = frame->VBufOffset;
		1209	src_surf_base[4] = frame->UBufOffset;
		1210	src_surf_base[5] = frame->UBufOffset;
		1211
		1212	if (is_planar_fourcc(frame->id)) {
		1213	src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
		1214	src_width[1] = src_width[0] = frame->width;
		1215	src_height[1] = src_height[0] = frame->height;
		1216	src_pitch[1] = src_pitch[0] = frame->pitch[1];
		1217	src_width[4] = src_width[5] = src_width[2] = src_width[3] =
		1218	frame->width / 2;
		1219	src_height[4] = src_height[5] = src_height[2] = src_height[3] =
		1220	frame->height / 2;
		1221	src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
		1222	frame->pitch[0];
		1223	n_src = 6;
		1224	} else {
		1225	if (frame->id == FOURCC_UYVY)
		1226	src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
		1227	else
		1228	src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
		1229
		1230	src_width[0] = frame->width;
		1231	src_height[0] = frame->height;
		1232	src_pitch[0] = frame->pitch[0];
		1233	n_src = 1;
		1234	}
		1235
		1236	gen4_get_batch(sna, op);
		1237
		1238	binding_table = gen4_composite_get_binding_table(sna, &offset);
		1239	binding_table[0] =
		1240	gen4_bind_bo(sna,
		1241	op->dst.bo, op->dst.width, op->dst.height,
		1242	gen4_get_dest_format(op->dst.format),
		1243	true);
		1244	for (n = 0; n < n_src; n++) {
		1245	binding_table[1+n] =
		1246	gen4_bind_video_source(sna,
		1247	frame->bo,
		1248	src_surf_base[n],
		1249	src_width[n],
		1250	src_height[n],
		1251	src_pitch[n],
		1252	src_surf_format);
		1253	}
		1254
		1255	gen4_emit_state(sna, op, offset \| dirty);
		1256	}
		1257
		1258	static bool
		1259	gen4_render_video(struct sna *sna,
		1260	struct sna_video *video,
		1261	struct sna_video_frame *frame,
		1262	RegionPtr dstRegion,
		1263	PixmapPtr pixmap)
		1264	{
		1265	struct sna_composite_op tmp;
		1266	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
		1267	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
		1268	int src_width = frame->src.x2 - frame->src.x1;
		1269	int src_height = frame->src.y2 - frame->src.y1;
		1270	float src_offset_x, src_offset_y;
		1271	float src_scale_x, src_scale_y;
		1272	int nbox, pix_xoff, pix_yoff;
		1273	struct sna_pixmap *priv;
		1274	BoxPtr box;
		1275
		1276	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
		1277	src_width, src_height, dst_width, dst_height));
		1278
		1279	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ \| MOVE_WRITE);
		1280	if (priv == NULL)
		1281	return false;
		1282
		1283	memset(&tmp, 0, sizeof(tmp));
		1284
		1285	tmp.op = PictOpSrc;
		1286	tmp.dst.pixmap = pixmap;
		1287	tmp.dst.width = pixmap->drawable.width;
		1288	tmp.dst.height = pixmap->drawable.height;
		1289	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
		1290	tmp.dst.bo = priv->gpu_bo;
		1291
		1292	if (src_width == dst_width && src_height == dst_height)
		1293	tmp.src.filter = SAMPLER_FILTER_NEAREST;
		1294	else
		1295	tmp.src.filter = SAMPLER_FILTER_BILINEAR;
		1296	tmp.src.repeat = SAMPLER_EXTEND_PAD;
		1297	tmp.src.bo = frame->bo;
		1298	tmp.mask.bo = NULL;
		1299	tmp.u.gen4.wm_kernel =
		1300	is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
		1301	tmp.u.gen4.ve_id = 2;
		1302	tmp.is_affine = true;
		1303	tmp.floats_per_vertex = 3;
		1304	tmp.floats_per_rect = 9;
		1305	tmp.priv = frame;
		1306
		1307	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
		1308	kgem_submit(&sna->kgem);
4501	Serge	1309	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
		1310	return false;
4304	Serge	1311	}
		1312
4501	Serge	1313	gen4_align_vertex(sna, &tmp);
4304	Serge	1314	gen4_video_bind_surfaces(sna, &tmp);
		1315
		1316	/* Set up the offset for translating from the given region (in screen
		1317	* coordinates) to the backing pixmap.
		1318	*/
		1319	#ifdef COMPOSITE
		1320	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
		1321	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
		1322	#else
		1323	pix_xoff = 0;
		1324	pix_yoff = 0;
		1325	#endif
		1326
		1327	src_scale_x = (float)src_width / dst_width / frame->width;
		1328	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
		1329
		1330	src_scale_y = (float)src_height / dst_height / frame->height;
		1331	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
		1332
		1333	box = REGION_RECTS(dstRegion);
		1334	nbox = REGION_NUM_RECTS(dstRegion);
		1335	do {
		1336	int n;
		1337
		1338	n = gen4_get_rectangles(sna, &tmp, nbox,
		1339	gen4_video_bind_surfaces);
		1340	assert(n);
		1341	nbox -= n;
		1342
		1343	do {
		1344	BoxRec r;
		1345
		1346	r.x1 = box->x1 + pix_xoff;
		1347	r.x2 = box->x2 + pix_xoff;
		1348	r.y1 = box->y1 + pix_yoff;
		1349	r.y2 = box->y2 + pix_yoff;
		1350
		1351	OUT_VERTEX(r.x2, r.y2);
		1352	OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
		1353	OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
		1354
		1355	OUT_VERTEX(r.x1, r.y2);
		1356	OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
		1357	OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
		1358
		1359	OUT_VERTEX(r.x1, r.y1);
		1360	OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
		1361	OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
		1362
		1363	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
		1364	sna_damage_add_box(&priv->gpu_damage, &r);
		1365	sna_damage_subtract_box(&priv->cpu_damage, &r);
		1366	}
		1367	box++;
		1368	} while (--n);
		1369	} while (nbox);
		1370	gen4_vertex_flush(sna);
		1371
		1372	return true;
		1373	}
		1374
		1375	static int
		1376	gen4_composite_picture(struct sna *sna,
		1377	PicturePtr picture,
		1378	struct sna_composite_channel *channel,
		1379	int x, int y,
		1380	int w, int h,
		1381	int dst_x, int dst_y,
		1382	bool precise)
		1383	{
		1384	PixmapPtr pixmap;
		1385	uint32_t color;
		1386	int16_t dx, dy;
		1387
		1388	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
		1389	__FUNCTION__, x, y, w, h, dst_x, dst_y));
		1390
		1391	channel->is_solid = false;
		1392	channel->card_format = -1;
		1393
		1394	if (sna_picture_is_solid(picture, &color))
		1395	return gen4_channel_init_solid(sna, channel, color);
		1396
		1397	if (picture->pDrawable == NULL) {
		1398	int ret;
		1399
		1400	if (picture->pSourcePict->type == SourcePictTypeLinear)
		1401	return gen4_channel_init_linear(sna, picture, channel,
		1402	x, y,
		1403	w, h,
		1404	dst_x, dst_y);
		1405
		1406	DBG(("%s -- fixup, gradient\n", __FUNCTION__));
		1407	ret = -1;
		1408	if (!precise)
		1409	ret = sna_render_picture_approximate_gradient(sna, picture, channel,
		1410	x, y, w, h, dst_x, dst_y);
		1411	if (ret == -1)
		1412	ret = sna_render_picture_fixup(sna, picture, channel,
		1413	x, y, w, h, dst_x, dst_y);
		1414	return ret;
		1415	}
		1416
		1417	if (picture->alphaMap) {
		1418	DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
		1419	return sna_render_picture_fixup(sna, picture, channel,
		1420	x, y, w, h, dst_x, dst_y);
		1421	}
		1422
		1423	if (!gen4_check_repeat(picture)) {
		1424	DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
		1425	return sna_render_picture_fixup(sna, picture, channel,
		1426	x, y, w, h, dst_x, dst_y);
		1427	}
		1428
		1429	if (!gen4_check_filter(picture)) {
		1430	DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
		1431	return sna_render_picture_fixup(sna, picture, channel,
		1432	x, y, w, h, dst_x, dst_y);
		1433	}
		1434
		1435	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
		1436	channel->filter = picture->filter;
		1437
		1438	pixmap = get_drawable_pixmap(picture->pDrawable);
		1439	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
		1440
		1441	x += dx + picture->pDrawable->x;
		1442	y += dy + picture->pDrawable->y;
		1443
		1444	channel->is_affine = sna_transform_is_affine(picture->transform);
		1445	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
		1446	DBG(("%s: integer translation (%d, %d), removing\n",
		1447	__FUNCTION__, dx, dy));
		1448	x += dx;
		1449	y += dy;
		1450	channel->transform = NULL;
		1451	channel->filter = PictFilterNearest;
		1452	} else
		1453	channel->transform = picture->transform;
		1454
		1455	channel->pict_format = picture->format;
		1456	channel->card_format = gen4_get_card_format(picture->format);
		1457	if (channel->card_format == -1)
		1458	return sna_render_picture_convert(sna, picture, channel, pixmap,
		1459	x, y, w, h, dst_x, dst_y,
		1460	false);
		1461
		1462	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
		1463	return sna_render_picture_extract(sna, picture, channel,
		1464	x, y, w, h, dst_x, dst_y);
		1465
		1466	return sna_render_pixmap_bo(sna, channel, pixmap,
		1467	x, y, w, h, dst_x, dst_y);
		1468	}
		1469
		1470	static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
		1471	{
		1472	DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
		1473	__FUNCTION__,
		1474	channel->repeat, gen4_repeat(channel->repeat),
		1475	channel->filter, gen4_repeat(channel->filter)));
		1476	channel->repeat = gen4_repeat(channel->repeat);
		1477	channel->filter = gen4_filter(channel->filter);
		1478	if (channel->card_format == (unsigned)-1)
		1479	channel->card_format = gen4_get_card_format(channel->pict_format);
		1480	}
		1481	#endif
		1482
		1483	static void
		1484	gen4_render_composite_done(struct sna *sna,
		1485	const struct sna_composite_op *op)
		1486	{
		1487	DBG(("%s()\n", __FUNCTION__));
		1488
		1489	if (sna->render.vertex_offset) {
		1490	gen4_vertex_flush(sna);
		1491	gen4_magic_ca_pass(sna, op);
		1492	}
		1493
		1494	}
		1495
		1496	#if 0
		1497	static bool
		1498	gen4_composite_set_target(struct sna *sna,
		1499	struct sna_composite_op *op,
		1500	PicturePtr dst,
		1501	int x, int y, int w, int h,
		1502	bool partial)
		1503	{
		1504	BoxRec box;
		1505
		1506	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
		1507	op->dst.width = op->dst.pixmap->drawable.width;
		1508	op->dst.height = op->dst.pixmap->drawable.height;
		1509	op->dst.format = dst->format;
		1510	if (w && h) {
		1511	box.x1 = x;
		1512	box.y1 = y;
		1513	box.x2 = x + w;
		1514	box.y2 = y + h;
		1515	} else
		1516	sna_render_picture_extents(dst, &box);
		1517
		1518	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
		1519	PREFER_GPU \| FORCE_GPU \| RENDER_GPU,
		1520	&box, &op->damage);
		1521	if (op->dst.bo == NULL)
		1522	return false;
		1523
		1524	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
		1525	&op->dst.x, &op->dst.y);
		1526
		1527	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
		1528	__FUNCTION__,
		1529	op->dst.pixmap, (int)op->dst.format,
		1530	op->dst.width, op->dst.height,
		1531	op->dst.bo->pitch,
		1532	op->dst.x, op->dst.y,
		1533	op->damage ? op->damage : (void )-1));
		1534
		1535	assert(op->dst.bo->proxy == NULL);
		1536
		1537	if (too_large(op->dst.width, op->dst.height) &&
		1538	!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
		1539	return false;
		1540
		1541	return true;
		1542	}
		1543
		1544	static bool
		1545	check_gradient(PicturePtr picture, bool precise)
		1546	{
		1547	switch (picture->pSourcePict->type) {
		1548	case SourcePictTypeSolidFill:
		1549	case SourcePictTypeLinear:
		1550	return false;
		1551	default:
		1552	return precise;
		1553	}
		1554	}
		1555
		1556	static bool
		1557	has_alphamap(PicturePtr p)
		1558	{
		1559	return p->alphaMap != NULL;
		1560	}
		1561
		1562	static bool
		1563	need_upload(struct sna *sna, PicturePtr p)
		1564	{
		1565	return p->pDrawable && untransformed(p) &&
		1566	!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
		1567	}
		1568
		1569	static bool
		1570	source_is_busy(PixmapPtr pixmap)
		1571	{
		1572	struct sna_pixmap *priv = sna_pixmap(pixmap);
		1573	if (priv == NULL)
		1574	return false;
		1575
		1576	if (priv->clear)
		1577	return false;
		1578
		1579	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
		1580	return true;
		1581
		1582	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
		1583	return true;
		1584
		1585	return priv->gpu_damage && !priv->cpu_damage;
		1586	}
		1587
		1588	static bool
		1589	source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
		1590	{
		1591	if (sna_picture_is_solid(p, NULL))
		1592	return false;
		1593
		1594	if (p->pSourcePict)
		1595	return check_gradient(p, precise);
		1596
		1597	if (!gen4_check_repeat(p) \|\| !gen4_check_format(p->format))
		1598	return true;
		1599
		1600	/* soft errors: perfer to upload/compute rather than readback */
		1601	if (pixmap && source_is_busy(pixmap))
		1602	return false;
		1603
		1604	return has_alphamap(p) \|\| !gen4_check_filter(p) \|\| need_upload(sna, p);
		1605	}
		1606
		1607	static bool
		1608	gen4_composite_fallback(struct sna *sna,
		1609	PicturePtr src,
		1610	PicturePtr mask,
		1611	PicturePtr dst)
		1612	{
		1613	PixmapPtr src_pixmap;
		1614	PixmapPtr mask_pixmap;
		1615	PixmapPtr dst_pixmap;
		1616	bool src_fallback, mask_fallback;
		1617
		1618	if (!gen4_check_dst_format(dst->format)) {
		1619	DBG(("%s: unknown destination format: %d\n",
		1620	__FUNCTION__, dst->format));
		1621	return true;
		1622	}
		1623
		1624	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
		1625
		1626	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
		1627	src_fallback = source_fallback(sna, src, src_pixmap,
		1628	dst->polyMode == PolyModePrecise);
		1629
		1630	if (mask) {
		1631	mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
		1632	mask_fallback = source_fallback(sna, mask, mask_pixmap,
		1633	dst->polyMode == PolyModePrecise);
		1634	} else {
		1635	mask_pixmap = NULL;
		1636	mask_fallback = false;
		1637	}
		1638
		1639	/* If we are using the destination as a source and need to
		1640	* readback in order to upload the source, do it all
		1641	* on the cpu.
		1642	*/
		1643	if (src_pixmap == dst_pixmap && src_fallback) {
		1644	DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
		1645	return true;
		1646	}
		1647	if (mask_pixmap == dst_pixmap && mask_fallback) {
		1648	DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
		1649	return true;
		1650	}
		1651
		1652	/* If anything is on the GPU, push everything out to the GPU */
		1653	if (dst_use_gpu(dst_pixmap)) {
		1654	DBG(("%s: dst is already on the GPU, try to use GPU\n",
		1655	__FUNCTION__));
		1656	return false;
		1657	}
		1658
		1659	if (src_pixmap && !src_fallback) {
		1660	DBG(("%s: src is already on the GPU, try to use GPU\n",
		1661	__FUNCTION__));
		1662	return false;
		1663	}
		1664	if (mask_pixmap && !mask_fallback) {
		1665	DBG(("%s: mask is already on the GPU, try to use GPU\n",
		1666	__FUNCTION__));
		1667	return false;
		1668	}
		1669
		1670	/* However if the dst is not on the GPU and we need to
		1671	* render one of the sources using the CPU, we may
		1672	* as well do the entire operation in place onthe CPU.
		1673	*/
		1674	if (src_fallback) {
		1675	DBG(("%s: dst is on the CPU and src will fallback\n",
		1676	__FUNCTION__));
		1677	return true;
		1678	}
		1679
		1680	if (mask_fallback) {
		1681	DBG(("%s: dst is on the CPU and mask will fallback\n",
		1682	__FUNCTION__));
		1683	return true;
		1684	}
		1685
		1686	if (too_large(dst_pixmap->drawable.width,
		1687	dst_pixmap->drawable.height) &&
		1688	dst_is_cpu(dst_pixmap)) {
		1689	DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
		1690	return true;
		1691	}
		1692
		1693	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
		1694	__FUNCTION__));
		1695	return dst_use_cpu(dst_pixmap);
		1696	}
		1697
		1698	static int
		1699	reuse_source(struct sna *sna,
		1700	PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
		1701	PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
		1702	{
		1703	uint32_t color;
		1704
		1705	if (src_x != msk_x \|\| src_y != msk_y)
		1706	return false;
		1707
		1708	if (src == mask) {
		1709	DBG(("%s: mask is source\n", __FUNCTION__));
		1710	mc = sc;
		1711	mc->bo = kgem_bo_reference(mc->bo);
		1712	return true;
		1713	}
		1714
		1715	if (sna_picture_is_solid(mask, &color))
		1716	return gen4_channel_init_solid(sna, mc, color);
		1717
		1718	if (sc->is_solid)
		1719	return false;
		1720
		1721	if (src->pDrawable == NULL \|\| mask->pDrawable != src->pDrawable)
		1722	return false;
		1723
		1724	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
		1725
		1726	if (!sna_transform_equal(src->transform, mask->transform))
		1727	return false;
		1728
		1729	if (!sna_picture_alphamap_equal(src, mask))
		1730	return false;
		1731
		1732	if (!gen4_check_repeat(mask))
		1733	return false;
		1734
		1735	if (!gen4_check_filter(mask))
		1736	return false;
		1737
		1738	if (!gen4_check_format(mask->format))
		1739	return false;
		1740
		1741	DBG(("%s: reusing source channel for mask with a twist\n",
		1742	__FUNCTION__));
		1743
		1744	mc = sc;
		1745	mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
		1746	mc->filter = gen4_filter(mask->filter);
		1747	mc->pict_format = mask->format;
		1748	mc->card_format = gen4_get_card_format(mask->format);
		1749	mc->bo = kgem_bo_reference(mc->bo);
		1750	return true;
		1751	}
		1752
		1753	static bool
		1754	gen4_render_composite(struct sna *sna,
		1755	uint8_t op,
		1756	PicturePtr src,
		1757	PicturePtr mask,
		1758	PicturePtr dst,
		1759	int16_t src_x, int16_t src_y,
		1760	int16_t msk_x, int16_t msk_y,
		1761	int16_t dst_x, int16_t dst_y,
		1762	int16_t width, int16_t height,
		1763	struct sna_composite_op *tmp)
		1764	{
		1765	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
		1766	width, height, sna->kgem.mode));
		1767
		1768	if (op >= ARRAY_SIZE(gen4_blend_op))
		1769	return false;
		1770
		1771	if (mask == NULL &&
		1772	sna_blt_composite(sna, op,
		1773	src, dst,
		1774	src_x, src_y,
		1775	dst_x, dst_y,
		1776	width, height,
		1777	tmp, false))
		1778	return true;
		1779
		1780	if (gen4_composite_fallback(sna, src, mask, dst))
		1781	return false;
		1782
		1783	if (need_tiling(sna, width, height))
		1784	return sna_tiling_composite(op, src, mask, dst,
		1785	src_x, src_y,
		1786	msk_x, msk_y,
		1787	dst_x, dst_y,
		1788	width, height,
		1789	tmp);
		1790
		1791	if (!gen4_composite_set_target(sna, tmp, dst,
		1792	dst_x, dst_y, width, height,
		1793	op > PictOpSrc \|\| dst->pCompositeClip->data)) {
		1794	DBG(("%s: failed to set composite target\n", __FUNCTION__));
		1795	return false;
		1796	}
		1797
		1798	tmp->op = op;
		1799	switch (gen4_composite_picture(sna, src, &tmp->src,
		1800	src_x, src_y,
		1801	width, height,
		1802	dst_x, dst_y,
		1803	dst->polyMode == PolyModePrecise)) {
		1804	case -1:
		1805	DBG(("%s: failed to prepare source\n", __FUNCTION__));
		1806	goto cleanup_dst;
		1807	case 0:
		1808	if (!gen4_channel_init_solid(sna, &tmp->src, 0))
		1809	goto cleanup_dst;
		1810	/* fall through to fixup */
		1811	case 1:
		1812	if (mask == NULL &&
		1813	sna_blt_composite__convert(sna,
		1814	dst_x, dst_y, width, height,
		1815	tmp))
		1816	return true;
		1817
		1818	gen4_composite_channel_convert(&tmp->src);
		1819	break;
		1820	}
		1821
		1822	tmp->is_affine = tmp->src.is_affine;
		1823	tmp->has_component_alpha = false;
		1824	tmp->need_magic_ca_pass = false;
		1825
		1826	if (mask) {
		1827	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
		1828	tmp->has_component_alpha = true;
		1829
		1830	/* Check if it's component alpha that relies on a source alpha and on
		1831	* the source value. We can only get one of those into the single
		1832	* source value that we get to blend with.
		1833	*/
		1834	if (gen4_blend_op[op].src_alpha &&
		1835	(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
		1836	if (op != PictOpOver) {
		1837	DBG(("%s -- fallback: unhandled component alpha blend\n",
		1838	__FUNCTION__));
		1839
		1840	goto cleanup_src;
		1841	}
		1842
		1843	tmp->need_magic_ca_pass = true;
		1844	tmp->op = PictOpOutReverse;
		1845	}
		1846	}
		1847
		1848	if (!reuse_source(sna,
		1849	src, &tmp->src, src_x, src_y,
		1850	mask, &tmp->mask, msk_x, msk_y)) {
		1851	switch (gen4_composite_picture(sna, mask, &tmp->mask,
		1852	msk_x, msk_y,
		1853	width, height,
		1854	dst_x, dst_y,
		1855	dst->polyMode == PolyModePrecise)) {
		1856	case -1:
		1857	DBG(("%s: failed to prepare mask\n", __FUNCTION__));
		1858	goto cleanup_src;
		1859	case 0:
		1860	if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
		1861	goto cleanup_src;
		1862	/* fall through to fixup */
		1863	case 1:
		1864	gen4_composite_channel_convert(&tmp->mask);
		1865	break;
		1866	}
		1867	}
		1868
		1869	tmp->is_affine &= tmp->mask.is_affine;
		1870	}
		1871
		1872	tmp->u.gen4.wm_kernel =
		1873	gen4_choose_composite_kernel(tmp->op,
		1874	tmp->mask.bo != NULL,
		1875	tmp->has_component_alpha,
		1876	tmp->is_affine);
		1877	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
		1878
		1879	tmp->blt = gen4_render_composite_blt;
		1880	tmp->box = gen4_render_composite_box;
		1881	tmp->boxes = gen4_render_composite_boxes__blt;
		1882	if (tmp->emit_boxes) {
		1883	tmp->boxes = gen4_render_composite_boxes;
		1884	#if !FORCE_FLUSH
		1885	tmp->thread_boxes = gen4_render_composite_boxes__thread;
		1886	#endif
		1887	}
		1888	tmp->done = gen4_render_composite_done;
		1889
		1890	if (!kgem_check_bo(&sna->kgem,
		1891	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		1892	NULL)) {
		1893	kgem_submit(&sna->kgem);
		1894	if (!kgem_check_bo(&sna->kgem,
		1895	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		1896	NULL))
		1897	goto cleanup_mask;
		1898	}
		1899
4501	Serge	1900	gen4_align_vertex(sna, tmp);
4304	Serge	1901	gen4_bind_surfaces(sna, tmp);
		1902	return true;
		1903
		1904	cleanup_mask:
		1905	if (tmp->mask.bo)
		1906	kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
		1907	cleanup_src:
		1908	if (tmp->src.bo)
		1909	kgem_bo_destroy(&sna->kgem, tmp->src.bo);
		1910	cleanup_dst:
		1911	if (tmp->redirect.real_bo)
		1912	kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
		1913	return false;
		1914	}
		1915
		1916	#endif
		1917
		1918
		1919
		1920
		1921
		1922
		1923
		1924
		1925
		1926
		1927
		1928
		1929
		1930
		1931
		1932
		1933
		1934
		1935
		1936
		1937
		1938
		1939
		1940
		1941
		1942
		1943
		1944
		1945
		1946
		1947
		1948
		1949
		1950
		1951
		1952
		1953
		1954
		1955
		1956
		1957
		1958	static void gen4_render_reset(struct sna *sna)
		1959	{
		1960	sna->render_state.gen4.needs_invariant = true;
		1961	sna->render_state.gen4.needs_urb = true;
		1962	sna->render_state.gen4.ve_id = -1;
		1963	sna->render_state.gen4.last_primitive = -1;
		1964	sna->render_state.gen4.last_pipelined_pointers = -1;
		1965
		1966	sna->render_state.gen4.drawrect_offset = -1;
		1967	sna->render_state.gen4.drawrect_limit = -1;
		1968	sna->render_state.gen4.surface_table = -1;
		1969
4501	Serge	1970	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
4304	Serge	1971	DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
		1972	discard_vbo(sna);
		1973	}
		1974
		1975	sna->render.vertex_offset = 0;
		1976	sna->render.nvertex_reloc = 0;
		1977	sna->render.vb_id = 0;
		1978	}
		1979
		1980	static void gen4_render_fini(struct sna *sna)
		1981	{
		1982	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
		1983	}
		1984
		1985	static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
		1986	{
		1987	struct gen4_vs_unit_state vs = sna_static_stream_map(stream, sizeof(vs), 32);
		1988
		1989	/* Set up the vertex shader to be disabled (passthrough) */
		1990	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
		1991	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
		1992	vs->vs6.vs_enable = 0;
		1993	vs->vs6.vert_cache_disable = 1;
		1994
		1995	return sna_static_stream_offsetof(stream, vs);
		1996	}
		1997
		1998	static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
		1999	uint32_t kernel)
		2000	{
		2001	struct gen4_sf_unit_state *sf;
		2002
		2003	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
		2004
		2005	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
		2006	sf->thread0.kernel_start_pointer = kernel >> 6;
		2007	sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
		2008	sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
		2009	sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
		2010	/* don't smash vertex header, read start from dw8 */
		2011	sf->thread3.urb_entry_read_offset = 1;
		2012	sf->thread3.dispatch_grf_start_reg = 3;
		2013	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
		2014	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
		2015	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
		2016	sf->sf5.viewport_transform = false; /* skip viewport */
		2017	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
		2018	sf->sf6.scissor = 0;
		2019	sf->sf7.trifan_pv = 2;
		2020	sf->sf6.dest_org_vbias = 0x8;
		2021	sf->sf6.dest_org_hbias = 0x8;
		2022
		2023	return sna_static_stream_offsetof(stream, sf);
		2024	}
		2025
		2026	static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
		2027	sampler_filter_t src_filter,
		2028	sampler_extend_t src_extend,
		2029	sampler_filter_t mask_filter,
		2030	sampler_extend_t mask_extend)
		2031	{
		2032	struct gen4_sampler_state *sampler_state;
		2033
		2034	sampler_state = sna_static_stream_map(stream,
		2035	sizeof(struct gen4_sampler_state) * 2,
		2036	32);
		2037	sampler_state_init(&sampler_state[0], src_filter, src_extend);
		2038	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
		2039
		2040	return sna_static_stream_offsetof(stream, sampler_state);
		2041	}
		2042
		2043	static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
		2044	int gen,
		2045	bool has_mask,
		2046	uint32_t kernel,
		2047	uint32_t sampler)
		2048	{
		2049	assert((kernel & 63) == 0);
		2050	wm->thread0.kernel_start_pointer = kernel >> 6;
		2051	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
		2052
		2053	wm->thread1.single_program_flow = 0;
		2054
		2055	wm->thread3.const_urb_entry_read_length = 0;
		2056	wm->thread3.const_urb_entry_read_offset = 0;
		2057
		2058	wm->thread3.urb_entry_read_offset = 0;
		2059	wm->thread3.dispatch_grf_start_reg = 3;
		2060
		2061	assert((sampler & 31) == 0);
		2062	wm->wm4.sampler_state_pointer = sampler >> 5;
		2063	wm->wm4.sampler_count = 1;
		2064
		2065	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
		2066	wm->wm5.transposed_urb_read = 0;
		2067	wm->wm5.thread_dispatch_enable = 1;
		2068	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
		2069	* start point
		2070	*/
		2071	wm->wm5.enable_16_pix = 1;
		2072	wm->wm5.enable_8_pix = 0;
		2073	wm->wm5.early_depth_test = 1;
		2074
		2075	/* Each pair of attributes (src/mask coords) is two URB entries */
		2076	if (has_mask) {
		2077	wm->thread1.binding_table_entry_count = 3;
		2078	wm->thread3.urb_entry_read_length = 4;
		2079	} else {
		2080	wm->thread1.binding_table_entry_count = 2;
		2081	wm->thread3.urb_entry_read_length = 2;
		2082	}
		2083	}
		2084
		2085	static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
		2086	{
		2087	uint8_t ptr, base;
		2088	int i, j;
		2089
		2090	base = ptr =
		2091	sna_static_stream_map(stream,
		2092	GEN4_BLENDFACTOR_COUNTGEN4_BLENDFACTOR_COUNT64,
		2093	64);
		2094
		2095	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
		2096	for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
		2097	struct gen4_cc_unit_state *state =
		2098	(struct gen4_cc_unit_state *)ptr;
		2099
		2100	state->cc3.blend_enable =
		2101	!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
		2102
		2103	state->cc5.logicop_func = 0xc; /* COPY */
		2104	state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
		2105
		2106	/* Fill in alpha blend factors same as color, for the future. */
		2107	state->cc5.ia_src_blend_factor = i;
		2108	state->cc5.ia_dest_blend_factor = j;
		2109
		2110	state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
		2111	state->cc6.clamp_post_alpha_blend = 1;
		2112	state->cc6.clamp_pre_alpha_blend = 1;
		2113	state->cc6.src_blend_factor = i;
		2114	state->cc6.dest_blend_factor = j;
		2115
		2116	ptr += 64;
		2117	}
		2118	}
		2119
		2120	return sna_static_stream_offsetof(stream, base);
		2121	}
		2122
		2123	static bool gen4_render_setup(struct sna *sna)
		2124	{
		2125	struct gen4_render_state *state = &sna->render_state.gen4;
		2126	struct sna_static_stream general;
		2127	struct gen4_wm_unit_state_padded *wm_state;
		2128	uint32_t sf, wm[KERNEL_COUNT];
		2129	int i, j, k, l, m;
		2130
		2131	sna_static_stream_init(&general);
		2132
		2133	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
		2134	* dumps, you know it points to zero.
		2135	*/
		2136	null_create(&general);
		2137
		2138	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
		2139	for (m = 0; m < KERNEL_COUNT; m++) {
		2140	if (wm_kernels[m].size) {
		2141	wm[m] = sna_static_stream_add(&general,
		2142	wm_kernels[m].data,
		2143	wm_kernels[m].size,
		2144	64);
		2145	} else {
		2146	wm[m] = sna_static_stream_compile_wm(sna, &general,
		2147	wm_kernels[m].data,
		2148	16);
		2149	}
		2150	}
		2151
		2152	state->vs = gen4_create_vs_unit_state(&general);
		2153	state->sf = gen4_create_sf_state(&general, sf);
		2154
		2155	wm_state = sna_static_stream_map(&general,
		2156	sizeof(wm_state) KERNEL_COUNT *
		2157	FILTER_COUNT * EXTEND_COUNT *
		2158	FILTER_COUNT * EXTEND_COUNT,
		2159	64);
		2160	state->wm = sna_static_stream_offsetof(&general, wm_state);
		2161	for (i = 0; i < FILTER_COUNT; i++) {
		2162	for (j = 0; j < EXTEND_COUNT; j++) {
		2163	for (k = 0; k < FILTER_COUNT; k++) {
		2164	for (l = 0; l < EXTEND_COUNT; l++) {
		2165	uint32_t sampler_state;
		2166
		2167	sampler_state =
		2168	gen4_create_sampler_state(&general,
		2169	i, j,
		2170	k, l);
		2171
		2172	for (m = 0; m < KERNEL_COUNT; m++) {
		2173	gen4_init_wm_state(&wm_state->state,
		2174	sna->kgem.gen,
		2175	wm_kernels[m].has_mask,
		2176	wm[m], sampler_state);
		2177	wm_state++;
		2178	}
		2179	}
		2180	}
		2181	}
		2182	}
		2183
		2184	state->cc = gen4_create_cc_unit_state(&general);
		2185
		2186	state->general_bo = sna_static_stream_fini(sna, &general);
		2187	return state->general_bo != NULL;
		2188	}
		2189
		2190	const char gen4_render_init(struct sna sna, const char *backend)
		2191	{
		2192	if (!gen4_render_setup(sna))
		2193	return backend;
		2194
		2195	sna->kgem.retire = gen4_render_retire;
		2196	sna->kgem.expire = gen4_render_expire;
		2197
		2198	#if 0
		2199	#if !NO_COMPOSITE
		2200	sna->render.composite = gen4_render_composite;
		2201	sna->render.prefer_gpu \|= PREFER_GPU_RENDER;
		2202	#endif
		2203	#if !NO_COMPOSITE_SPANS
		2204	sna->render.check_composite_spans = gen4_check_composite_spans;
		2205	sna->render.composite_spans = gen4_render_composite_spans;
		2206	if (0)
		2207	sna->render.prefer_gpu \|= PREFER_GPU_SPANS;
		2208	#endif
		2209
		2210	#if !NO_VIDEO
		2211	sna->render.video = gen4_render_video;
		2212	#endif
		2213
		2214	#if !NO_COPY_BOXES
		2215	sna->render.copy_boxes = gen4_render_copy_boxes;
		2216	#endif
		2217	#if !NO_COPY
		2218	sna->render.copy = gen4_render_copy;
		2219	#endif
		2220
		2221	#if !NO_FILL_BOXES
		2222	sna->render.fill_boxes = gen4_render_fill_boxes;
		2223	#endif
		2224	#if !NO_FILL
		2225	sna->render.fill = gen4_render_fill;
		2226	#endif
		2227	#if !NO_FILL_ONE
		2228	sna->render.fill_one = gen4_render_fill_one;
		2229	#endif
		2230
		2231	#endif
		2232
		2233	sna->render.blit_tex = gen4_blit_tex;
		2234	sna->render.caps = HW_BIT_BLIT \| HW_TEX_BLIT;
		2235
		2236	sna->render.flush = gen4_render_flush;
		2237	sna->render.reset = gen4_render_reset;
		2238	sna->render.fini = gen4_render_fini;
		2239
		2240	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
		2241	sna->render.max_3d_pitch = 1 << 18;
		2242	return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
		2243	}
		2244
		2245	static bool
		2246	gen4_blit_tex(struct sna *sna,
		2247	uint8_t op, bool scale,
		2248	PixmapPtr src, struct kgem_bo *src_bo,
		2249	PixmapPtr mask,struct kgem_bo *mask_bo,
		2250	PixmapPtr dst, struct kgem_bo *dst_bo,
		2251	int32_t src_x, int32_t src_y,
		2252	int32_t msk_x, int32_t msk_y,
		2253	int32_t dst_x, int32_t dst_y,
		2254	int32_t width, int32_t height,
		2255	struct sna_composite_op *tmp)
		2256	{
		2257
		2258	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
		2259	width, height, sna->kgem.ring));
		2260
		2261	tmp->op = PictOpSrc;
		2262
		2263	tmp->dst.pixmap = dst;
		2264	tmp->dst.bo = dst_bo;
		2265	tmp->dst.width = dst->drawable.width;
		2266	tmp->dst.height = dst->drawable.height;
		2267	tmp->dst.format = PICT_a8r8g8b8;
		2268
		2269
		2270	tmp->src.repeat = RepeatNone;
		2271	tmp->src.filter = PictFilterNearest;
		2272	tmp->src.is_affine = true;
		2273
		2274	tmp->src.bo = src_bo;
		2275	tmp->src.pict_format = PICT_x8r8g8b8;
		2276	tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
		2277	tmp->src.width = src->drawable.width;
		2278	tmp->src.height = src->drawable.height;
		2279
		2280	tmp->is_affine = tmp->src.is_affine;
		2281	tmp->has_component_alpha = false;
		2282	tmp->need_magic_ca_pass = false;
		2283
		2284	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
		2285	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
		2286	tmp->mask.is_affine = true;
		2287
		2288	tmp->mask.bo = mask_bo;
		2289	tmp->mask.pict_format = PIXMAN_a8;
		2290	tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
		2291	tmp->mask.width = mask->drawable.width;
		2292	tmp->mask.height = mask->drawable.height;
		2293
		2294	if( scale )
		2295	{
		2296	tmp->src.scale[0] = 1.f/width;
		2297	tmp->src.scale[1] = 1.f/height;
		2298	}
		2299	else
		2300	{
		2301	tmp->src.scale[0] = 1.f/src->drawable.width;
		2302	tmp->src.scale[1] = 1.f/src->drawable.height;
		2303	}
		2304	// tmp->src.offset[0] = -dst_x;
		2305	// tmp->src.offset[1] = -dst_y;
		2306
		2307
		2308	tmp->mask.scale[0] = 1.f/mask->drawable.width;
		2309	tmp->mask.scale[1] = 1.f/mask->drawable.height;
		2310	// tmp->mask.offset[0] = -dst_x;
		2311	// tmp->mask.offset[1] = -dst_y;
		2312
		2313	tmp->u.gen4.wm_kernel = WM_KERNEL_MASK;
		2314	// gen4_choose_composite_kernel(tmp->op,
		2315	// tmp->mask.bo != NULL,
		2316	// tmp->has_component_alpha,
		2317	// tmp->is_affine);
		2318	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
		2319
		2320	tmp->blt = gen4_render_composite_blt;
		2321	tmp->done = gen4_render_composite_done;
		2322
		2323	if (!kgem_check_bo(&sna->kgem,
		2324	tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
		2325	NULL)) {
		2326	kgem_submit(&sna->kgem);
		2327	}
		2328
4501	Serge	2329	gen4_align_vertex(sna, tmp);
4304	Serge	2330	gen4_bind_surfaces(sna, tmp);
		2331	return true;
		2332	}
		2333

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Intel-2D/sna/gen4_render.c – Rev 4768