Subversion Repositories Kolibri OS

Rev

Rev 4359 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4359 Rev 4501
1
/*
1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
11
 *
12
 * The above copyright notice and this permission notice (including the next
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
14
 * Software.
15
 *
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
22
 * SOFTWARE.
23
 *
23
 *
24
 * Authors:
24
 * Authors:
25
 *    Wang Zhenyu 
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
26
 *    Eric Anholt 
27
 *    Carl Worth 
27
 *    Carl Worth 
28
 *    Keith Packard 
28
 *    Keith Packard 
29
 *    Chris Wilson 
29
 *    Chris Wilson 
30
 *
30
 *
31
 */
31
 */
32
 
32
 
33
#ifdef HAVE_CONFIG_H
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
34
#include "config.h"
35
#endif
35
#endif
36
 
36
 
37
#include "sna.h"
37
#include "sna.h"
38
#include "sna_reg.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
41
//#include "sna_video.h"
42
 
42
 
43
#include "brw/brw.h"
43
#include "brw/brw.h"
44
#include "gen5_render.h"
44
#include "gen5_render.h"
-
 
45
#include "gen4_common.h"
45
#include "gen4_source.h"
46
#include "gen4_source.h"
46
#include "gen4_vertex.h"
47
#include "gen4_vertex.h"
47
 
48
 
48
#define NO_COMPOSITE 0
49
#define NO_COMPOSITE 0
49
#define NO_COMPOSITE_SPANS 0
50
#define NO_COMPOSITE_SPANS 0
50
 
51
 
51
#define PREFER_BLT_FILL 1
52
#define PREFER_BLT_FILL 1
52
 
53
 
53
#define DBG_NO_STATE_CACHE 0
54
#define DBG_NO_STATE_CACHE 0
54
#define DBG_NO_SURFACE_CACHE 0
55
#define DBG_NO_SURFACE_CACHE 0
55
 
56
 
56
#define MAX_3D_SIZE 8192
57
#define MAX_3D_SIZE 8192
57
 
58
 
58
#define GEN5_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
59
#define GEN5_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
59
 
60
 
60
/* Set up a default static partitioning of the URB, which is supposed to
61
/* Set up a default static partitioning of the URB, which is supposed to
61
 * allow anything we would want to do, at potentially lower performance.
62
 * allow anything we would want to do, at potentially lower performance.
62
 */
63
 */
63
#define URB_CS_ENTRY_SIZE     1
64
#define URB_CS_ENTRY_SIZE     1
64
#define URB_CS_ENTRIES	      0
65
#define URB_CS_ENTRIES	      0
65
 
66
 
66
#define URB_VS_ENTRY_SIZE     1
67
#define URB_VS_ENTRY_SIZE     1
67
#define URB_VS_ENTRIES	      256 /* minimum of 8 */
68
#define URB_VS_ENTRIES	      256 /* minimum of 8 */
68
 
69
 
69
#define URB_GS_ENTRY_SIZE     0
70
#define URB_GS_ENTRY_SIZE     0
70
#define URB_GS_ENTRIES	      0
71
#define URB_GS_ENTRIES	      0
71
 
72
 
72
#define URB_CLIP_ENTRY_SIZE   0
73
#define URB_CLIP_ENTRY_SIZE   0
73
#define URB_CLIP_ENTRIES      0
74
#define URB_CLIP_ENTRIES      0
74
 
75
 
75
#define URB_SF_ENTRY_SIZE     2
76
#define URB_SF_ENTRY_SIZE     2
76
#define URB_SF_ENTRIES	      64
77
#define URB_SF_ENTRIES	      64
77
 
78
 
78
/*
79
/*
79
 * this program computes dA/dx and dA/dy for the texture coordinates along
80
 * this program computes dA/dx and dA/dy for the texture coordinates along
80
 * with the base texture coordinate. It was extracted from the Mesa driver
81
 * with the base texture coordinate. It was extracted from the Mesa driver
81
 */
82
 */
82
 
83
 
83
#define SF_KERNEL_NUM_GRF  16
84
#define SF_KERNEL_NUM_GRF  16
84
#define SF_MAX_THREADS	   48
85
#define SF_MAX_THREADS	   48
85
 
86
 
86
#define PS_KERNEL_NUM_GRF   32
87
#define PS_KERNEL_NUM_GRF   32
87
#define PS_MAX_THREADS	    72
88
#define PS_MAX_THREADS	    72
88
 
89
 
89
static const uint32_t ps_kernel_packed_static[][4] = {
90
static const uint32_t ps_kernel_packed_static[][4] = {
90
#include "exa_wm_xy.g5b"
91
#include "exa_wm_xy.g5b"
91
#include "exa_wm_src_affine.g5b"
92
#include "exa_wm_src_affine.g5b"
92
#include "exa_wm_src_sample_argb.g5b"
93
#include "exa_wm_src_sample_argb.g5b"
93
#include "exa_wm_yuv_rgb.g5b"
94
#include "exa_wm_yuv_rgb.g5b"
94
#include "exa_wm_write.g5b"
95
#include "exa_wm_write.g5b"
95
};
96
};
96
 
97
 
97
static const uint32_t ps_kernel_planar_static[][4] = {
98
static const uint32_t ps_kernel_planar_static[][4] = {
98
#include "exa_wm_xy.g5b"
99
#include "exa_wm_xy.g5b"
99
#include "exa_wm_src_affine.g5b"
100
#include "exa_wm_src_affine.g5b"
100
#include "exa_wm_src_sample_planar.g5b"
101
#include "exa_wm_src_sample_planar.g5b"
101
#include "exa_wm_yuv_rgb.g5b"
102
#include "exa_wm_yuv_rgb.g5b"
102
#include "exa_wm_write.g5b"
103
#include "exa_wm_write.g5b"
103
};
104
};
104
 
105
 
105
#define NOKERNEL(kernel_enum, func, masked) \
106
#define NOKERNEL(kernel_enum, func, masked) \
106
    [kernel_enum] = {func, 0, masked}
107
    [kernel_enum] = {func, 0, masked}
107
#define KERNEL(kernel_enum, kernel, masked) \
108
#define KERNEL(kernel_enum, kernel, masked) \
108
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
109
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
109
static const struct wm_kernel_info {
110
static const struct wm_kernel_info {
110
	const void *data;
111
	const void *data;
111
	unsigned int size;
112
	unsigned int size;
112
	bool has_mask;
113
	bool has_mask;
113
} wm_kernels[] = {
114
} wm_kernels[] = {
114
	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
115
	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
115
	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
116
	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
116
 
117
 
117
	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
118
	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
118
	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
119
	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
119
 
120
 
120
	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
121
	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
121
	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
122
	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
122
 
123
 
123
	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
124
	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
124
	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
125
	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
125
 
126
 
126
	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
127
	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
127
	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
128
	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
128
 
129
 
129
	KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
130
	KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
130
	KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
131
	KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
131
};
132
};
132
#undef KERNEL
133
#undef KERNEL
133
 
134
 
134
static const struct blendinfo {
135
static const struct blendinfo {
135
	bool src_alpha;
136
	bool src_alpha;
136
	uint32_t src_blend;
137
	uint32_t src_blend;
137
	uint32_t dst_blend;
138
	uint32_t dst_blend;
138
} gen5_blend_op[] = {
139
} gen5_blend_op[] = {
139
	/* Clear */	{0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
140
	/* Clear */	{0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
140
	/* Src */	{0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
141
	/* Src */	{0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
141
	/* Dst */	{0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
142
	/* Dst */	{0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
142
	/* Over */	{1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
143
	/* Over */	{1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
143
	/* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
144
	/* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
144
	/* In */	{0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
145
	/* In */	{0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
145
	/* InReverse */	{1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
146
	/* InReverse */	{1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
146
	/* Out */	{0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
147
	/* Out */	{0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
147
	/* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
148
	/* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
148
	/* Atop */	{1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
149
	/* Atop */	{1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
149
	/* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
150
	/* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
150
	/* Xor */	{1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
151
	/* Xor */	{1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
151
	/* Add */	{0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
152
	/* Add */	{0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
152
};
153
};
153
 
154
 
154
/**
155
/**
155
 * Highest-valued BLENDFACTOR used in gen5_blend_op.
156
 * Highest-valued BLENDFACTOR used in gen5_blend_op.
156
 *
157
 *
157
 * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
158
 * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
158
 * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
159
 * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
159
 * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
160
 * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
160
 */
161
 */
161
#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
162
#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
162
 
163
 
163
#define BLEND_OFFSET(s, d) \
164
#define BLEND_OFFSET(s, d) \
164
	(((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
165
	(((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
165
 
166
 
166
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
167
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
167
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
168
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
168
 
169
 
169
static bool
170
static bool
170
gen5_emit_pipelined_pointers(struct sna *sna,
171
gen5_emit_pipelined_pointers(struct sna *sna,
171
			     const struct sna_composite_op *op,
172
			     const struct sna_composite_op *op,
172
			     int blend, int kernel);
173
			     int blend, int kernel);
173
 
174
 
174
#define OUT_BATCH(v) batch_emit(sna, v)
175
#define OUT_BATCH(v) batch_emit(sna, v)
175
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
176
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
176
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
177
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
177
 
178
 
178
static inline bool too_large(int width, int height)
179
static inline bool too_large(int width, int height)
179
{
180
{
180
	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
181
	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
181
}
182
}
182
 
183
 
183
static int
184
static int
184
gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
185
gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
185
{
186
{
186
	int base;
187
	int base;
187
 
188
 
188
	if (has_mask) {
189
	if (has_mask) {
189
		if (is_ca) {
190
		if (is_ca) {
190
			if (gen5_blend_op[op].src_alpha)
191
			if (gen5_blend_op[op].src_alpha)
191
				base = WM_KERNEL_MASKSA;
192
				base = WM_KERNEL_MASKSA;
192
			else
193
			else
193
				base = WM_KERNEL_MASKCA;
194
				base = WM_KERNEL_MASKCA;
194
		} else
195
		} else
195
			base = WM_KERNEL_MASK;
196
			base = WM_KERNEL_MASK;
196
	} else
197
	} else
197
		base = WM_KERNEL;
198
		base = WM_KERNEL;
198
 
199
 
199
	return base + !is_affine;
200
	return base + !is_affine;
200
}
201
}
201
 
202
 
202
static bool gen5_magic_ca_pass(struct sna *sna,
203
static bool gen5_magic_ca_pass(struct sna *sna,
203
			       const struct sna_composite_op *op)
204
			       const struct sna_composite_op *op)
204
{
205
{
205
	struct gen5_render_state *state = &sna->render_state.gen5;
206
	struct gen5_render_state *state = &sna->render_state.gen5;
206
 
207
 
207
	if (!op->need_magic_ca_pass)
208
	if (!op->need_magic_ca_pass)
208
		return false;
209
		return false;
209
 
210
 
210
	assert(sna->render.vertex_index > sna->render.vertex_start);
211
	assert(sna->render.vertex_index > sna->render.vertex_start);
211
 
212
 
212
	DBG(("%s: CA fixup\n", __FUNCTION__));
213
	DBG(("%s: CA fixup\n", __FUNCTION__));
213
	assert(op->mask.bo != NULL);
214
	assert(op->mask.bo != NULL);
214
	assert(op->has_component_alpha);
215
	assert(op->has_component_alpha);
215
 
216
 
216
	gen5_emit_pipelined_pointers
217
	gen5_emit_pipelined_pointers
217
		(sna, op, PictOpAdd,
218
		(sna, op, PictOpAdd,
218
		 gen5_choose_composite_kernel(PictOpAdd,
219
		 gen5_choose_composite_kernel(PictOpAdd,
219
					      true, true, op->is_affine));
220
					      true, true, op->is_affine));
220
 
221
 
221
	OUT_BATCH(GEN5_3DPRIMITIVE |
222
	OUT_BATCH(GEN5_3DPRIMITIVE |
222
		  GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
223
		  GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
223
		  (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
224
		  (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
224
		  (0 << 9) |
225
		  (0 << 9) |
225
		  4);
226
		  4);
226
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
227
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
227
	OUT_BATCH(sna->render.vertex_start);
228
	OUT_BATCH(sna->render.vertex_start);
228
	OUT_BATCH(1);	/* single instance */
229
	OUT_BATCH(1);	/* single instance */
229
	OUT_BATCH(0);	/* start instance location */
230
	OUT_BATCH(0);	/* start instance location */
230
	OUT_BATCH(0);	/* index buffer offset, ignored */
231
	OUT_BATCH(0);	/* index buffer offset, ignored */
231
 
232
 
232
	state->last_primitive = sna->kgem.nbatch;
233
	state->last_primitive = sna->kgem.nbatch;
233
	return true;
234
	return true;
234
}
235
}
235
 
236
 
236
static uint32_t gen5_get_blend(int op,
237
static uint32_t gen5_get_blend(int op,
237
			       bool has_component_alpha,
238
			       bool has_component_alpha,
238
			       uint32_t dst_format)
239
			       uint32_t dst_format)
239
{
240
{
240
	uint32_t src, dst;
241
	uint32_t src, dst;
241
 
242
 
242
    src = GEN5_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
243
    src = GEN5_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
243
    dst = GEN5_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
244
    dst = GEN5_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
244
#if 0
245
#if 0
245
	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
246
	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
246
	 * it as always 1.
247
	 * it as always 1.
247
	 */
248
	 */
248
	if (PICT_FORMAT_A(dst_format) == 0) {
249
	if (PICT_FORMAT_A(dst_format) == 0) {
249
		if (src == GEN5_BLENDFACTOR_DST_ALPHA)
250
		if (src == GEN5_BLENDFACTOR_DST_ALPHA)
250
			src = GEN5_BLENDFACTOR_ONE;
251
			src = GEN5_BLENDFACTOR_ONE;
251
		else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
252
		else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
252
			src = GEN5_BLENDFACTOR_ZERO;
253
			src = GEN5_BLENDFACTOR_ZERO;
253
	}
254
	}
254
 
255
 
255
	/* If the source alpha is being used, then we should only be in a
256
	/* If the source alpha is being used, then we should only be in a
256
	 * case where the source blend factor is 0, and the source blend
257
	 * case where the source blend factor is 0, and the source blend
257
	 * value is the mask channels multiplied by the source picture's alpha.
258
	 * value is the mask channels multiplied by the source picture's alpha.
258
	 */
259
	 */
259
	if (has_component_alpha && gen5_blend_op[op].src_alpha) {
260
	if (has_component_alpha && gen5_blend_op[op].src_alpha) {
260
		if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
261
		if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
261
			dst = GEN5_BLENDFACTOR_SRC_COLOR;
262
			dst = GEN5_BLENDFACTOR_SRC_COLOR;
262
		else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
263
		else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
263
			dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
264
			dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
264
	}
265
	}
265
#endif
266
#endif
266
 
267
 
267
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
268
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
268
	     op, dst_format, PICT_FORMAT_A(dst_format),
269
	     op, dst_format, PICT_FORMAT_A(dst_format),
269
	     src, dst, BLEND_OFFSET(src, dst)));
270
	     src, dst, BLEND_OFFSET(src, dst)));
270
	return BLEND_OFFSET(src, dst);
271
	return BLEND_OFFSET(src, dst);
271
}
272
}
272
 
273
 
273
static uint32_t gen5_get_card_format(PictFormat format)
274
static uint32_t gen5_get_card_format(PictFormat format)
274
{
275
{
275
	switch (format) {
276
	switch (format) {
276
	default:
277
	default:
277
		return -1;
278
		return -1;
278
	case PICT_a8r8g8b8:
279
	case PICT_a8r8g8b8:
279
		return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
280
		return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
280
	case PICT_x8r8g8b8:
281
	case PICT_x8r8g8b8:
281
		return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
282
		return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
282
	case PICT_a8b8g8r8:
283
	case PICT_a8b8g8r8:
283
		return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
284
		return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
284
	case PICT_x8b8g8r8:
285
	case PICT_x8b8g8r8:
285
		return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
286
		return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
286
	case PICT_a2r10g10b10:
287
	case PICT_a2r10g10b10:
287
		return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
288
		return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
288
	case PICT_x2r10g10b10:
289
	case PICT_x2r10g10b10:
289
		return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
290
		return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
290
	case PICT_r8g8b8:
291
	case PICT_r8g8b8:
291
		return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
292
		return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
292
	case PICT_r5g6b5:
293
	case PICT_r5g6b5:
293
		return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
294
		return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
294
	case PICT_a1r5g5b5:
295
	case PICT_a1r5g5b5:
295
		return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
296
		return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
296
	case PICT_a8:
297
	case PICT_a8:
297
		return GEN5_SURFACEFORMAT_A8_UNORM;
298
		return GEN5_SURFACEFORMAT_A8_UNORM;
298
	case PICT_a4r4g4b4:
299
	case PICT_a4r4g4b4:
299
		return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
300
		return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
300
	}
301
	}
301
}
302
}
302
 
303
 
303
static uint32_t gen5_get_dest_format(PictFormat format)
304
static uint32_t gen5_get_dest_format(PictFormat format)
304
{
305
{
305
	return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
306
	return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
306
#if 0
307
#if 0
307
	switch (format) {
308
	switch (format) {
308
	default:
309
	default:
309
		return -1;
310
		return -1;
310
	case PICT_a8r8g8b8:
311
	case PICT_a8r8g8b8:
311
	case PICT_x8r8g8b8:
312
	case PICT_x8r8g8b8:
312
		return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
313
		return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
313
	case PICT_a8b8g8r8:
314
	case PICT_a8b8g8r8:
314
	case PICT_x8b8g8r8:
315
	case PICT_x8b8g8r8:
315
		return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
316
		return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
316
	case PICT_a2r10g10b10:
317
	case PICT_a2r10g10b10:
317
	case PICT_x2r10g10b10:
318
	case PICT_x2r10g10b10:
318
		return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
319
		return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
319
	case PICT_r5g6b5:
320
	case PICT_r5g6b5:
320
		return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
321
		return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
321
	case PICT_x1r5g5b5:
322
	case PICT_x1r5g5b5:
322
	case PICT_a1r5g5b5:
323
	case PICT_a1r5g5b5:
323
		return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
324
		return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
324
	case PICT_a8:
325
	case PICT_a8:
325
		return GEN5_SURFACEFORMAT_A8_UNORM;
326
		return GEN5_SURFACEFORMAT_A8_UNORM;
326
	case PICT_a4r4g4b4:
327
	case PICT_a4r4g4b4:
327
	case PICT_x4r4g4b4:
328
	case PICT_x4r4g4b4:
328
		return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
329
		return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
329
	}
330
	}
330
#endif
331
#endif
331
}
332
}
332
typedef struct gen5_surface_state_padded {
333
typedef struct gen5_surface_state_padded {
333
	struct gen5_surface_state state;
334
	struct gen5_surface_state state;
334
	char pad[32 - sizeof(struct gen5_surface_state)];
335
	char pad[32 - sizeof(struct gen5_surface_state)];
335
} gen5_surface_state_padded;
336
} gen5_surface_state_padded;
336
 
337
 
337
static void null_create(struct sna_static_stream *stream)
338
static void null_create(struct sna_static_stream *stream)
338
{
339
{
339
	/* A bunch of zeros useful for legacy border color and depth-stencil */
340
	/* A bunch of zeros useful for legacy border color and depth-stencil */
340
	sna_static_stream_map(stream, 64, 64);
341
	sna_static_stream_map(stream, 64, 64);
341
}
342
}
342
 
343
 
343
static void
344
static void
344
sampler_state_init(struct gen5_sampler_state *sampler_state,
345
sampler_state_init(struct gen5_sampler_state *sampler_state,
345
		   sampler_filter_t filter,
346
		   sampler_filter_t filter,
346
		   sampler_extend_t extend)
347
		   sampler_extend_t extend)
347
{
348
{
348
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
349
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
349
 
350
 
350
	/* We use the legacy mode to get the semantics specified by
351
	/* We use the legacy mode to get the semantics specified by
351
	 * the Render extension. */
352
	 * the Render extension. */
352
	sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
353
	sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
353
 
354
 
354
	switch (filter) {
355
	switch (filter) {
355
	default:
356
	default:
356
	case SAMPLER_FILTER_NEAREST:
357
	case SAMPLER_FILTER_NEAREST:
357
		sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
358
		sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
358
		sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
359
		sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
359
		break;
360
		break;
360
	case SAMPLER_FILTER_BILINEAR:
361
	case SAMPLER_FILTER_BILINEAR:
361
		sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
362
		sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
362
		sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
363
		sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
363
		break;
364
		break;
364
	}
365
	}
365
 
366
 
366
	switch (extend) {
367
	switch (extend) {
367
	default:
368
	default:
368
	case SAMPLER_EXTEND_NONE:
369
	case SAMPLER_EXTEND_NONE:
369
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
370
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
370
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
371
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
371
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
372
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
372
		break;
373
		break;
373
	case SAMPLER_EXTEND_REPEAT:
374
	case SAMPLER_EXTEND_REPEAT:
374
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
375
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
375
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
376
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
376
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
377
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
377
		break;
378
		break;
378
	case SAMPLER_EXTEND_PAD:
379
	case SAMPLER_EXTEND_PAD:
379
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
380
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
380
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
381
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
381
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
382
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
382
		break;
383
		break;
383
	case SAMPLER_EXTEND_REFLECT:
384
	case SAMPLER_EXTEND_REFLECT:
384
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
385
		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
385
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
386
		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
386
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
387
		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
387
		break;
388
		break;
388
	}
389
	}
389
}
390
}
390
 
391
 
391
static uint32_t
392
static uint32_t
392
gen5_tiling_bits(uint32_t tiling)
393
gen5_tiling_bits(uint32_t tiling)
393
{
394
{
394
	switch (tiling) {
395
	switch (tiling) {
395
	default: assert(0);
396
	default: assert(0);
396
	case I915_TILING_NONE: return 0;
397
	case I915_TILING_NONE: return 0;
397
	case I915_TILING_X: return GEN5_SURFACE_TILED;
398
	case I915_TILING_X: return GEN5_SURFACE_TILED;
398
	case I915_TILING_Y: return GEN5_SURFACE_TILED | GEN5_SURFACE_TILED_Y;
399
	case I915_TILING_Y: return GEN5_SURFACE_TILED | GEN5_SURFACE_TILED_Y;
399
	}
400
	}
400
}
401
}
401
 
402
 
402
/**
403
/**
403
 * Sets up the common fields for a surface state buffer for the given
404
 * Sets up the common fields for a surface state buffer for the given
404
 * picture in the given surface state buffer.
405
 * picture in the given surface state buffer.
405
 */
406
 */
406
static uint32_t
407
static uint32_t
407
gen5_bind_bo(struct sna *sna,
408
gen5_bind_bo(struct sna *sna,
408
	     struct kgem_bo *bo,
409
	     struct kgem_bo *bo,
409
	     uint32_t width,
410
	     uint32_t width,
410
	     uint32_t height,
411
	     uint32_t height,
411
	     uint32_t format,
412
	     uint32_t format,
412
	     bool is_dst)
413
	     bool is_dst)
413
{
414
{
414
	uint32_t domains;
415
	uint32_t domains;
415
	uint16_t offset;
416
	uint16_t offset;
416
	uint32_t *ss;
417
	uint32_t *ss;
417
 
418
 
418
	/* After the first bind, we manage the cache domains within the batch */
419
	/* After the first bind, we manage the cache domains within the batch */
419
	if (!DBG_NO_SURFACE_CACHE) {
420
	if (!DBG_NO_SURFACE_CACHE) {
420
		offset = kgem_bo_get_binding(bo, format | is_dst << 31);
421
		offset = kgem_bo_get_binding(bo, format | is_dst << 31);
421
		if (offset) {
422
		if (offset) {
422
			if (is_dst)
423
			if (is_dst)
423
				kgem_bo_mark_dirty(bo);
424
				kgem_bo_mark_dirty(bo);
424
			return offset * sizeof(uint32_t);
425
			return offset * sizeof(uint32_t);
425
		}
426
		}
426
	}
427
	}
427
 
428
 
428
	offset = sna->kgem.surface -=
429
	offset = sna->kgem.surface -=
429
		sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
430
		sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
430
	ss = sna->kgem.batch + offset;
431
	ss = sna->kgem.batch + offset;
431
 
432
 
432
	ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
433
	ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
433
		 GEN5_SURFACE_BLEND_ENABLED |
434
		 GEN5_SURFACE_BLEND_ENABLED |
434
		 format << GEN5_SURFACE_FORMAT_SHIFT);
435
		 format << GEN5_SURFACE_FORMAT_SHIFT);
435
 
436
 
436
	if (is_dst) {
437
	if (is_dst) {
437
		ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
438
		ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
438
		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
439
		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
439
	} else
440
	} else
440
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
441
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
441
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
442
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
442
 
443
 
443
	ss[2] = ((width - 1)  << GEN5_SURFACE_WIDTH_SHIFT |
444
	ss[2] = ((width - 1)  << GEN5_SURFACE_WIDTH_SHIFT |
444
		 (height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
445
		 (height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
445
	ss[3] = (gen5_tiling_bits(bo->tiling) |
446
	ss[3] = (gen5_tiling_bits(bo->tiling) |
446
		 (bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
447
		 (bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
447
	ss[4] = 0;
448
	ss[4] = 0;
448
	ss[5] = 0;
449
	ss[5] = 0;
449
 
450
 
450
	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
451
	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
451
 
452
 
452
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
453
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
453
	     offset, bo->handle, ss[1],
454
	     offset, bo->handle, ss[1],
454
	     format, width, height, bo->pitch, bo->tiling,
455
	     format, width, height, bo->pitch, bo->tiling,
455
	     domains & 0xffff ? "render" : "sampler"));
456
	     domains & 0xffff ? "render" : "sampler"));
456
 
457
 
457
	return offset * sizeof(uint32_t);
458
	return offset * sizeof(uint32_t);
458
}
459
}
459
 
460
 
460
static void gen5_emit_vertex_buffer(struct sna *sna,
461
static void gen5_emit_vertex_buffer(struct sna *sna,
461
				    const struct sna_composite_op *op)
462
				    const struct sna_composite_op *op)
462
{
463
{
463
	int id = op->u.gen5.ve_id;
464
	int id = op->u.gen5.ve_id;
464
 
465
 
465
	assert((sna->render.vb_id & (1 << id)) == 0);
466
	assert((sna->render.vb_id & (1 << id)) == 0);
466
 
467
 
467
	OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
468
	OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
468
	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
469
	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
469
		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
470
		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
470
	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
471
	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
471
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
472
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
472
	OUT_BATCH(0);
473
	OUT_BATCH(0);
473
	OUT_BATCH(~0); /* max address: disabled */
474
	OUT_BATCH(~0); /* max address: disabled */
474
	OUT_BATCH(0);
475
	OUT_BATCH(0);
475
 
476
 
476
	sna->render.vb_id |= 1 << id;
477
	sna->render.vb_id |= 1 << id;
477
}
478
}
478
 
479
 
479
static void gen5_emit_primitive(struct sna *sna)
480
static void gen5_emit_primitive(struct sna *sna)
480
{
481
{
481
	if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
482
	if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
482
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
483
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
483
		return;
484
		return;
484
	}
485
	}
485
 
486
 
486
	OUT_BATCH(GEN5_3DPRIMITIVE |
487
	OUT_BATCH(GEN5_3DPRIMITIVE |
487
		  GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
488
		  GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
488
		  (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
489
		  (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
489
		  (0 << 9) |
490
		  (0 << 9) |
490
		  4);
491
		  4);
491
	sna->render.vertex_offset = sna->kgem.nbatch;
492
	sna->render.vertex_offset = sna->kgem.nbatch;
492
	OUT_BATCH(0);	/* vertex count, to be filled in later */
493
	OUT_BATCH(0);	/* vertex count, to be filled in later */
493
	OUT_BATCH(sna->render.vertex_index);
494
	OUT_BATCH(sna->render.vertex_index);
494
	OUT_BATCH(1);	/* single instance */
495
	OUT_BATCH(1);	/* single instance */
495
	OUT_BATCH(0);	/* start instance location */
496
	OUT_BATCH(0);	/* start instance location */
496
	OUT_BATCH(0);	/* index buffer offset, ignored */
497
	OUT_BATCH(0);	/* index buffer offset, ignored */
497
	sna->render.vertex_start = sna->render.vertex_index;
498
	sna->render.vertex_start = sna->render.vertex_index;
498
 
499
 
499
	sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
500
	sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
500
}
501
}
501
 
502
 
502
static bool gen5_rectangle_begin(struct sna *sna,
503
static bool gen5_rectangle_begin(struct sna *sna,
503
				 const struct sna_composite_op *op)
504
				 const struct sna_composite_op *op)
504
{
505
{
505
	int id = op->u.gen5.ve_id;
506
	int id = op->u.gen5.ve_id;
506
	int ndwords;
507
	int ndwords;
507
 
508
 
508
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
509
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
509
		return true;
510
		return true;
510
 
511
 
511
	ndwords = op->need_magic_ca_pass ? 20 : 6;
512
	ndwords = op->need_magic_ca_pass ? 20 : 6;
512
	if ((sna->render.vb_id & (1 << id)) == 0)
513
	if ((sna->render.vb_id & (1 << id)) == 0)
513
		ndwords += 5;
514
		ndwords += 5;
514
 
515
 
515
	if (!kgem_check_batch(&sna->kgem, ndwords))
516
	if (!kgem_check_batch(&sna->kgem, ndwords))
516
		return false;
517
		return false;
517
 
518
 
518
	if ((sna->render.vb_id & (1 << id)) == 0)
519
	if ((sna->render.vb_id & (1 << id)) == 0)
519
		gen5_emit_vertex_buffer(sna, op);
520
		gen5_emit_vertex_buffer(sna, op);
520
	if (sna->render.vertex_offset == 0)
521
	if (sna->render.vertex_offset == 0)
521
		gen5_emit_primitive(sna);
522
		gen5_emit_primitive(sna);
522
 
523
 
523
	return true;
524
	return true;
524
}
525
}
525
 
526
 
526
static int gen5_get_rectangles__flush(struct sna *sna,
527
static int gen5_get_rectangles__flush(struct sna *sna,
527
				      const struct sna_composite_op *op)
528
				      const struct sna_composite_op *op)
528
{
529
{
529
	/* Preventing discarding new vbo after lock contention */
530
	/* Preventing discarding new vbo after lock contention */
530
	if (sna_vertex_wait__locked(&sna->render)) {
531
	if (sna_vertex_wait__locked(&sna->render)) {
531
		int rem = vertex_space(sna);
532
		int rem = vertex_space(sna);
532
		if (rem > op->floats_per_rect)
533
		if (rem > op->floats_per_rect)
533
			return rem;
534
			return rem;
534
	}
535
	}
535
 
536
 
536
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
537
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
537
		return 0;
538
		return 0;
538
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
539
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
539
		return 0;
540
		return 0;
540
 
541
 
541
	if (sna->render.vertex_offset) {
542
	if (sna->render.vertex_offset) {
542
		gen4_vertex_flush(sna);
543
		gen4_vertex_flush(sna);
543
		if (gen5_magic_ca_pass(sna, op))
544
		if (gen5_magic_ca_pass(sna, op))
544
			gen5_emit_pipelined_pointers(sna, op, op->op,
545
			gen5_emit_pipelined_pointers(sna, op, op->op,
545
						     op->u.gen5.wm_kernel);
546
						     op->u.gen5.wm_kernel);
546
	}
547
	}
547
 
548
 
548
	return gen4_vertex_finish(sna);
549
	return gen4_vertex_finish(sna);
549
}
550
}
550
 
551
 
551
inline static int gen5_get_rectangles(struct sna *sna,
552
inline static int gen5_get_rectangles(struct sna *sna,
552
				      const struct sna_composite_op *op,
553
				      const struct sna_composite_op *op,
553
				      int want,
554
				      int want,
554
				      void (*emit_state)(struct sna *sna,
555
				      void (*emit_state)(struct sna *sna,
555
							 const struct sna_composite_op *op))
556
							 const struct sna_composite_op *op))
556
{
557
{
557
	int rem;
558
	int rem;
558
 
559
 
559
	assert(want);
560
	assert(want);
560
 
561
 
561
start:
562
start:
562
	rem = vertex_space(sna);
563
	rem = vertex_space(sna);
563
	if (unlikely(rem < op->floats_per_rect)) {
564
	if (unlikely(rem < op->floats_per_rect)) {
564
		DBG(("flushing vbo for %s: %d < %d\n",
565
		DBG(("flushing vbo for %s: %d < %d\n",
565
		     __FUNCTION__, rem, op->floats_per_rect));
566
		     __FUNCTION__, rem, op->floats_per_rect));
566
		rem = gen5_get_rectangles__flush(sna, op);
567
		rem = gen5_get_rectangles__flush(sna, op);
567
		if (unlikely (rem == 0))
568
		if (unlikely (rem == 0))
568
			goto flush;
569
			goto flush;
569
	}
570
	}
570
 
571
 
571
	if (unlikely(sna->render.vertex_offset == 0)) {
572
	if (unlikely(sna->render.vertex_offset == 0)) {
572
		if (!gen5_rectangle_begin(sna, op))
573
		if (!gen5_rectangle_begin(sna, op))
573
			goto flush;
574
			goto flush;
574
		else
575
		else
575
			goto start;
576
			goto start;
576
	}
577
	}
577
 
578
 
578
	assert(rem <= vertex_space(sna));
579
	assert(rem <= vertex_space(sna));
579
	assert(op->floats_per_rect <= rem);
580
	assert(op->floats_per_rect <= rem);
580
	if (want > 1 && want * op->floats_per_rect > rem)
581
	if (want > 1 && want * op->floats_per_rect > rem)
581
		want = rem / op->floats_per_rect;
582
		want = rem / op->floats_per_rect;
582
 
583
 
583
	sna->render.vertex_index += 3*want;
584
	sna->render.vertex_index += 3*want;
584
	return want;
585
	return want;
585
 
586
 
586
flush:
587
flush:
587
	if (sna->render.vertex_offset) {
588
	if (sna->render.vertex_offset) {
588
		gen4_vertex_flush(sna);
589
		gen4_vertex_flush(sna);
589
		gen5_magic_ca_pass(sna, op);
590
		gen5_magic_ca_pass(sna, op);
590
	}
591
	}
591
	sna_vertex_wait__locked(&sna->render);
592
	sna_vertex_wait__locked(&sna->render);
592
	_kgem_submit(&sna->kgem);
593
	_kgem_submit(&sna->kgem);
593
	emit_state(sna, op);
594
	emit_state(sna, op);
594
	goto start;
595
	goto start;
595
}
596
}
596
 
597
 
597
static uint32_t *
598
static uint32_t *
598
gen5_composite_get_binding_table(struct sna *sna,
599
gen5_composite_get_binding_table(struct sna *sna,
599
				 uint16_t *offset)
600
				 uint16_t *offset)
600
{
601
{
601
	sna->kgem.surface -=
602
	sna->kgem.surface -=
602
		sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
603
		sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
603
 
604
 
604
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
605
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
605
 
606
 
606
	/* Clear all surplus entries to zero in case of prefetch */
607
	/* Clear all surplus entries to zero in case of prefetch */
607
	*offset = sna->kgem.surface;
608
	*offset = sna->kgem.surface;
608
	return memset(sna->kgem.batch + sna->kgem.surface,
609
	return memset(sna->kgem.batch + sna->kgem.surface,
609
		      0, sizeof(struct gen5_surface_state_padded));
610
		      0, sizeof(struct gen5_surface_state_padded));
610
}
611
}
611
 
612
 
612
static void
613
static void
613
gen5_emit_urb(struct sna *sna)
614
gen5_emit_urb(struct sna *sna)
614
{
615
{
615
	int urb_vs_start, urb_vs_size;
616
	int urb_vs_start, urb_vs_size;
616
	int urb_gs_start, urb_gs_size;
617
	int urb_gs_start, urb_gs_size;
617
	int urb_clip_start, urb_clip_size;
618
	int urb_clip_start, urb_clip_size;
618
	int urb_sf_start, urb_sf_size;
619
	int urb_sf_start, urb_sf_size;
619
	int urb_cs_start, urb_cs_size;
620
	int urb_cs_start, urb_cs_size;
620
 
621
 
621
	urb_vs_start = 0;
622
	urb_vs_start = 0;
622
	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
623
	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
623
	urb_gs_start = urb_vs_start + urb_vs_size;
624
	urb_gs_start = urb_vs_start + urb_vs_size;
624
	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
625
	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
625
	urb_clip_start = urb_gs_start + urb_gs_size;
626
	urb_clip_start = urb_gs_start + urb_gs_size;
626
	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
627
	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
627
	urb_sf_start = urb_clip_start + urb_clip_size;
628
	urb_sf_start = urb_clip_start + urb_clip_size;
628
	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
629
	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
629
	urb_cs_start = urb_sf_start + urb_sf_size;
630
	urb_cs_start = urb_sf_start + urb_sf_size;
630
	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
631
	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
631
 
632
 
632
	OUT_BATCH(GEN5_URB_FENCE |
633
	OUT_BATCH(GEN5_URB_FENCE |
633
		  UF0_CS_REALLOC |
634
		  UF0_CS_REALLOC |
634
		  UF0_SF_REALLOC |
635
		  UF0_SF_REALLOC |
635
		  UF0_CLIP_REALLOC |
636
		  UF0_CLIP_REALLOC |
636
		  UF0_GS_REALLOC |
637
		  UF0_GS_REALLOC |
637
		  UF0_VS_REALLOC |
638
		  UF0_VS_REALLOC |
638
		  1);
639
		  1);
639
	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
640
	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
640
		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
641
		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
641
		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
642
		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
642
	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
643
	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
643
		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
644
		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
644
 
645
 
645
	/* Constant buffer state */
646
	/* Constant buffer state */
646
	OUT_BATCH(GEN5_CS_URB_STATE | 0);
647
	OUT_BATCH(GEN5_CS_URB_STATE | 0);
647
	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
648
	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
648
}
649
}
649
 
650
 
650
static void
651
static void
651
gen5_emit_state_base_address(struct sna *sna)
652
gen5_emit_state_base_address(struct sna *sna)
652
{
653
{
653
	assert(sna->render_state.gen5.general_bo->proxy == NULL);
654
	assert(sna->render_state.gen5.general_bo->proxy == NULL);
654
	OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
655
	OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
655
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
656
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
656
				 sna->kgem.nbatch,
657
				 sna->kgem.nbatch,
657
				 sna->render_state.gen5.general_bo,
658
				 sna->render_state.gen5.general_bo,
658
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
659
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
659
				 BASE_ADDRESS_MODIFY));
660
				 BASE_ADDRESS_MODIFY));
660
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
661
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
661
				 sna->kgem.nbatch,
662
				 sna->kgem.nbatch,
662
				 NULL,
663
				 NULL,
663
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
664
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
664
				 BASE_ADDRESS_MODIFY));
665
				 BASE_ADDRESS_MODIFY));
665
	OUT_BATCH(0); /* media */
666
	OUT_BATCH(0); /* media */
666
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
667
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
667
				 sna->kgem.nbatch,
668
				 sna->kgem.nbatch,
668
				 sna->render_state.gen5.general_bo,
669
				 sna->render_state.gen5.general_bo,
669
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
670
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
670
				 BASE_ADDRESS_MODIFY));
671
				 BASE_ADDRESS_MODIFY));
671
 
672
 
672
	/* upper bounds, all disabled */
673
	/* upper bounds, all disabled */
673
	OUT_BATCH(BASE_ADDRESS_MODIFY);
674
	OUT_BATCH(BASE_ADDRESS_MODIFY);
674
	OUT_BATCH(0);
675
	OUT_BATCH(0);
675
	OUT_BATCH(BASE_ADDRESS_MODIFY);
676
	OUT_BATCH(BASE_ADDRESS_MODIFY);
676
}
677
}
677
 
678
 
678
static void
679
static void
679
gen5_emit_invariant(struct sna *sna)
680
gen5_emit_invariant(struct sna *sna)
680
{
681
{
681
	/* Ironlake errata workaround: Before disabling the clipper,
682
	/* Ironlake errata workaround: Before disabling the clipper,
682
	 * you have to MI_FLUSH to get the pipeline idle.
683
	 * you have to MI_FLUSH to get the pipeline idle.
683
	 *
684
	 *
684
	 * However, the kernel flushes the pipeline between batches,
685
	 * However, the kernel flushes the pipeline between batches,
685
	 * so we should be safe....
686
	 * so we should be safe....
686
	 *
687
	 *
687
	 * On the other hand, after using BLT we must use a non-pipelined
688
	 * On the other hand, after using BLT we must use a non-pipelined
688
	 * operation...
689
	 * operation...
689
	 */
690
	 */
690
	if (sna->kgem.nreloc)
691
	if (sna->kgem.nreloc)
691
		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
692
		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
692
 
693
 
693
	OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
694
	OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
694
 
695
 
695
	gen5_emit_state_base_address(sna);
696
	gen5_emit_state_base_address(sna);
696
 
697
 
697
	sna->render_state.gen5.needs_invariant = false;
698
	sna->render_state.gen5.needs_invariant = false;
698
}
699
}
699
 
700
 
700
static void
701
static void
701
gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
702
gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
702
{
703
{
703
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
704
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
704
 
705
 
705
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
706
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
706
		DBG(("%s: flushing batch: %d < %d+%d\n",
707
		DBG(("%s: flushing batch: %d < %d+%d\n",
707
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
708
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
708
		     150, 4*8));
709
		     150, 4*8));
709
		kgem_submit(&sna->kgem);
710
		kgem_submit(&sna->kgem);
710
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
711
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
711
	}
712
	}
712
 
713
 
713
	if (sna->render_state.gen5.needs_invariant)
714
	if (sna->render_state.gen5.needs_invariant)
714
		gen5_emit_invariant(sna);
715
		gen5_emit_invariant(sna);
715
}
716
}
716
 
717
 
717
static void
718
static void
718
gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
719
gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
719
{
720
{
720
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
721
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
721
	if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
722
	if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
722
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
-
 
723
			gen4_vertex_finish(sna);
-
 
724
 
-
 
725
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
723
		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
726
		     sna->render_state.gen5.floats_per_vertex,
724
		     sna->render_state.gen5.floats_per_vertex,
727
		     op->floats_per_vertex,
725
		     op->floats_per_vertex));
728
		     sna->render.vertex_index,
726
		gen4_vertex_align(sna, op);
729
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
-
 
730
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
-
 
731
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
-
 
732
		sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
727
		sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
733
	}
728
	}
734
}
729
}
735
 
730
 
736
static void
731
static void
737
gen5_emit_binding_table(struct sna *sna, uint16_t offset)
732
gen5_emit_binding_table(struct sna *sna, uint16_t offset)
738
{
733
{
739
	if (!DBG_NO_STATE_CACHE &&
734
	if (!DBG_NO_STATE_CACHE &&
740
	    sna->render_state.gen5.surface_table == offset)
735
	    sna->render_state.gen5.surface_table == offset)
741
		return;
736
		return;
742
 
737
 
743
	sna->render_state.gen5.surface_table = offset;
738
	sna->render_state.gen5.surface_table = offset;
744
 
739
 
745
	/* Binding table pointers */
740
	/* Binding table pointers */
746
	OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
741
	OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
747
	OUT_BATCH(0);		/* vs */
742
	OUT_BATCH(0);		/* vs */
748
	OUT_BATCH(0);		/* gs */
743
	OUT_BATCH(0);		/* gs */
749
	OUT_BATCH(0);		/* clip */
744
	OUT_BATCH(0);		/* clip */
750
	OUT_BATCH(0);		/* sf */
745
	OUT_BATCH(0);		/* sf */
751
	/* Only the PS uses the binding table */
746
	/* Only the PS uses the binding table */
752
	OUT_BATCH(offset*4);
747
	OUT_BATCH(offset*4);
753
}
748
}
754
 
749
 
755
static bool
750
static bool
756
gen5_emit_pipelined_pointers(struct sna *sna,
751
gen5_emit_pipelined_pointers(struct sna *sna,
757
			     const struct sna_composite_op *op,
752
			     const struct sna_composite_op *op,
758
			     int blend, int kernel)
753
			     int blend, int kernel)
759
{
754
{
760
	uint16_t sp, bp;
755
	uint16_t sp, bp;
761
	uint32_t key;
756
	uint32_t key;
762
 
757
 
763
	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
758
	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
764
	     __FUNCTION__, op->u.gen5.ve_id & 2,
759
	     __FUNCTION__, op->u.gen5.ve_id & 2,
765
	     op->src.filter, op->src.repeat,
760
	     op->src.filter, op->src.repeat,
766
	     op->mask.filter, op->mask.repeat,
761
	     op->mask.filter, op->mask.repeat,
767
	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
762
	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
768
 
763
 
769
	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
764
	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
770
			    op->mask.filter, op->mask.repeat,
765
			    op->mask.filter, op->mask.repeat,
771
			    kernel);
766
			    kernel);
772
	bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
767
	bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
773
 
768
 
774
	key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
769
	key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
775
	DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
770
	DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
776
	     __FUNCTION__, sp, bp, key,
771
	     __FUNCTION__, sp, bp, key,
777
	     sna->render_state.gen5.last_pipelined_pointers & 0xffff,
772
	     sna->render_state.gen5.last_pipelined_pointers & 0xffff,
778
	     (sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
773
	     (sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
779
	     sna->render_state.gen5.last_pipelined_pointers));
774
	     sna->render_state.gen5.last_pipelined_pointers));
780
	if (key == sna->render_state.gen5.last_pipelined_pointers)
775
	if (key == sna->render_state.gen5.last_pipelined_pointers)
781
		return false;
776
		return false;
782
 
777
 
783
	OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
778
	OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
784
	OUT_BATCH(sna->render_state.gen5.vs);
779
	OUT_BATCH(sna->render_state.gen5.vs);
785
	OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
780
	OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
786
	OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
781
	OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
787
	OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
782
	OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
788
	OUT_BATCH(sna->render_state.gen5.wm + sp);
783
	OUT_BATCH(sna->render_state.gen5.wm + sp);
789
	OUT_BATCH(sna->render_state.gen5.cc + bp);
784
	OUT_BATCH(sna->render_state.gen5.cc + bp);
790
 
785
 
791
	bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
786
	bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
792
	sna->render_state.gen5.last_pipelined_pointers = key;
787
	sna->render_state.gen5.last_pipelined_pointers = key;
793
 
788
 
794
	gen5_emit_urb(sna);
789
	gen5_emit_urb(sna);
795
 
790
 
796
	return bp;
791
	return bp;
797
}
792
}
798
 
793
 
799
static bool
794
static bool
800
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
795
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
801
{
796
{
802
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
797
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
803
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
798
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
804
 
799
 
805
	assert(!too_large(op->dst.x, op->dst.y));
800
	assert(!too_large(op->dst.x, op->dst.y));
806
	assert(!too_large(op->dst.width, op->dst.height));
801
	assert(!too_large(op->dst.width, op->dst.height));
807
 
802
 
808
	if (!DBG_NO_STATE_CACHE &&
803
	if (!DBG_NO_STATE_CACHE &&
809
	    sna->render_state.gen5.drawrect_limit == limit &&
804
	    sna->render_state.gen5.drawrect_limit == limit &&
810
	    sna->render_state.gen5.drawrect_offset == offset)
805
	    sna->render_state.gen5.drawrect_offset == offset)
811
		return false;
806
		return false;
812
 
807
 
813
	sna->render_state.gen5.drawrect_offset = offset;
808
	sna->render_state.gen5.drawrect_offset = offset;
814
	sna->render_state.gen5.drawrect_limit = limit;
809
	sna->render_state.gen5.drawrect_limit = limit;
815
 
810
 
816
	OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
811
	OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
817
	OUT_BATCH(0x00000000);
812
	OUT_BATCH(0x00000000);
818
	OUT_BATCH(limit);
813
	OUT_BATCH(limit);
819
	OUT_BATCH(offset);
814
	OUT_BATCH(offset);
820
	return true;
815
	return true;
821
}
816
}
822
 
817
 
823
static void
818
static void
824
gen5_emit_vertex_elements(struct sna *sna,
819
gen5_emit_vertex_elements(struct sna *sna,
825
			  const struct sna_composite_op *op)
820
			  const struct sna_composite_op *op)
826
{
821
{
827
	/*
822
	/*
828
	 * vertex data in vertex buffer
823
	 * vertex data in vertex buffer
829
	 *    position: (x, y)
824
	 *    position: (x, y)
830
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
825
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
831
	 *    texture coordinate 1 if (has_mask is true): same as above
826
	 *    texture coordinate 1 if (has_mask is true): same as above
832
	 */
827
	 */
833
	struct gen5_render_state *render = &sna->render_state.gen5;
828
	struct gen5_render_state *render = &sna->render_state.gen5;
834
	int id = op->u.gen5.ve_id;
829
	int id = op->u.gen5.ve_id;
835
	bool has_mask = id >> 2;
830
	bool has_mask = id >> 2;
836
	uint32_t format, dw;
831
	uint32_t format, dw;
837
 
832
 
838
	if (!DBG_NO_STATE_CACHE && render->ve_id == id)
833
	if (!DBG_NO_STATE_CACHE && render->ve_id == id)
839
		return;
834
		return;
840
 
835
 
841
	DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
836
	DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
842
	render->ve_id = id;
837
	render->ve_id = id;
843
 
838
 
844
	/* The VUE layout
839
	/* The VUE layout
845
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
840
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
846
	 *    dword 4-7: position (x, y, 1.0, 1.0),
841
	 *    dword 4-7: position (x, y, 1.0, 1.0),
847
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
842
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
848
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
843
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
849
	 *
844
	 *
850
	 * dword 4-15 are fetched from vertex buffer
845
	 * dword 4-15 are fetched from vertex buffer
851
	 */
846
	 */
852
	OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
847
	OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
853
		((2 * (has_mask ? 4 : 3)) + 1 - 2));
848
		((2 * (has_mask ? 4 : 3)) + 1 - 2));
854
 
849
 
855
	OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
850
	OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
856
		  (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
851
		  (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
857
		  (0 << VE0_OFFSET_SHIFT));
852
		  (0 << VE0_OFFSET_SHIFT));
858
	OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
853
	OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
859
		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
854
		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
860
		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
855
		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
861
		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
856
		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
862
 
857
 
863
	/* x,y */
858
	/* x,y */
864
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
859
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
865
		  GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
860
		  GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
866
		  0 << VE0_OFFSET_SHIFT);
861
		  0 << VE0_OFFSET_SHIFT);
867
	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
862
	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
868
		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
863
		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
869
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
864
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
870
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
865
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
871
 
866
 
872
	/* u0, v0, w0 */
867
	/* u0, v0, w0 */
873
	DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
868
	DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
874
	     id, id & 3));
869
	     id, id & 3));
875
	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
870
	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
876
	switch (id & 3) {
871
	switch (id & 3) {
877
	default:
872
	default:
878
		assert(0);
873
		assert(0);
879
	case 0:
874
	case 0:
880
		format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
875
		format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
881
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
876
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
882
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
877
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
883
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
878
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
884
		break;
879
		break;
885
	case 1:
880
	case 1:
886
		format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
881
		format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
887
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
882
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
888
		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
883
		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
889
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
884
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
890
		break;
885
		break;
891
	case 2:
886
	case 2:
892
		format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
887
		format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
893
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
888
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
894
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
889
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
895
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
890
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
896
		break;
891
		break;
897
	case 3:
892
	case 3:
898
		format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
893
		format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
899
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
894
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
900
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
895
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
901
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
896
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
902
		break;
897
		break;
903
	}
898
	}
904
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
899
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
905
		  format | 4 << VE0_OFFSET_SHIFT);
900
		  format | 4 << VE0_OFFSET_SHIFT);
906
	OUT_BATCH(dw);
901
	OUT_BATCH(dw);
907
 
902
 
908
	/* u1, v1, w1 */
903
	/* u1, v1, w1 */
909
	if (has_mask) {
904
	if (has_mask) {
910
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
905
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
911
		DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
906
		DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
912
		     id, id >> 2, offset));
907
		     id, id >> 2, offset));
913
		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
908
		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
914
		switch (id >> 2) {
909
		switch (id >> 2) {
915
		case 1:
910
		case 1:
916
			format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
911
			format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
917
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
912
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
918
			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
913
			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
919
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
914
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
920
			break;
915
			break;
921
		default:
916
		default:
922
			assert(0);
917
			assert(0);
923
		case 2:
918
		case 2:
924
			format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
919
			format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
925
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
920
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
926
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
921
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
927
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
922
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
928
			break;
923
			break;
929
		case 3:
924
		case 3:
930
			format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
925
			format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
931
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
926
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
932
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
927
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
933
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
928
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
934
			break;
929
			break;
935
		}
930
		}
936
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
931
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
937
			  format | offset << VE0_OFFSET_SHIFT);
932
			  format | offset << VE0_OFFSET_SHIFT);
938
		OUT_BATCH(dw);
933
		OUT_BATCH(dw);
939
	}
934
	}
940
}
935
}
941
 
936
 
942
inline static void
937
inline static void
943
gen5_emit_pipe_flush(struct sna *sna)
938
gen5_emit_pipe_flush(struct sna *sna)
944
{
939
{
-
 
940
#if 0
945
	OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
941
	OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
946
	OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
942
	OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
947
	OUT_BATCH(0);
943
	OUT_BATCH(0);
948
	OUT_BATCH(0);
944
	OUT_BATCH(0);
-
 
945
#else
-
 
946
	OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
-
 
947
#endif
949
}
948
}
950
 
949
 
951
static void
950
static void
952
gen5_emit_state(struct sna *sna,
951
gen5_emit_state(struct sna *sna,
953
		const struct sna_composite_op *op,
952
		const struct sna_composite_op *op,
954
		uint16_t offset)
953
		uint16_t offset)
955
{
954
{
956
	bool flush = false;
955
	bool flush = false;
957
 
956
 
958
	assert(op->dst.bo->exec);
957
	assert(op->dst.bo->exec);
959
 
958
 
960
	/* drawrect must be first for Ironlake BLT workaround */
959
	/* drawrect must be first for Ironlake BLT workaround */
961
	if (gen5_emit_drawing_rectangle(sna, op))
960
	if (gen5_emit_drawing_rectangle(sna, op))
962
		offset &= ~1;
961
		offset &= ~1;
963
	gen5_emit_binding_table(sna, offset & ~1);
962
	gen5_emit_binding_table(sna, offset & ~1);
964
	if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
963
	if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
965
		DBG(("%s: changed blend state, flush required? %d\n",
964
		DBG(("%s: changed blend state, flush required? %d\n",
966
		     __FUNCTION__, (offset & 1) && op->op > PictOpSrc));
965
		     __FUNCTION__, (offset & 1) && op->op > PictOpSrc));
967
		flush = (offset & 1) && op->op > PictOpSrc;
966
		flush = (offset & 1) && op->op > PictOpSrc;
968
	}
967
	}
969
	gen5_emit_vertex_elements(sna, op);
968
	gen5_emit_vertex_elements(sna, op);
970
 
969
 
971
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
970
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
972
		DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
971
		DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
973
		     kgem_bo_is_dirty(op->src.bo),
972
		     kgem_bo_is_dirty(op->src.bo),
974
		     kgem_bo_is_dirty(op->mask.bo)));
973
		     kgem_bo_is_dirty(op->mask.bo)));
975
		OUT_BATCH(MI_FLUSH);
974
		OUT_BATCH(MI_FLUSH);
976
		kgem_clear_dirty(&sna->kgem);
975
		kgem_clear_dirty(&sna->kgem);
977
		kgem_bo_mark_dirty(op->dst.bo);
976
		kgem_bo_mark_dirty(op->dst.bo);
978
		flush = false;
977
		flush = false;
979
	}
978
	}
980
	if (flush) {
979
	if (flush) {
981
		DBG(("%s: forcing flush\n", __FUNCTION__));
980
		DBG(("%s: forcing flush\n", __FUNCTION__));
982
		gen5_emit_pipe_flush(sna);
981
		gen5_emit_pipe_flush(sna);
983
	}
982
	}
984
}
983
}
985
 
984
 
986
static void gen5_bind_surfaces(struct sna *sna,
985
static void gen5_bind_surfaces(struct sna *sna,
987
			       const struct sna_composite_op *op)
986
			       const struct sna_composite_op *op)
988
{
987
{
989
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
988
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
990
	uint32_t *binding_table;
989
	uint32_t *binding_table;
991
	uint16_t offset;
990
	uint16_t offset;
992
 
991
 
993
	gen5_get_batch(sna, op);
992
	gen5_get_batch(sna, op);
994
 
993
 
995
	binding_table = gen5_composite_get_binding_table(sna, &offset);
994
	binding_table = gen5_composite_get_binding_table(sna, &offset);
996
 
995
 
997
	binding_table[0] =
996
	binding_table[0] =
998
		gen5_bind_bo(sna,
997
		gen5_bind_bo(sna,
999
			    op->dst.bo, op->dst.width, op->dst.height,
998
			    op->dst.bo, op->dst.width, op->dst.height,
1000
			    gen5_get_dest_format(op->dst.format),
999
			    gen5_get_dest_format(op->dst.format),
1001
			    true);
1000
			    true);
1002
	binding_table[1] =
1001
	binding_table[1] =
1003
		gen5_bind_bo(sna,
1002
		gen5_bind_bo(sna,
1004
			     op->src.bo, op->src.width, op->src.height,
1003
			     op->src.bo, op->src.width, op->src.height,
1005
			     op->src.card_format,
1004
			     op->src.card_format,
1006
			     false);
1005
			     false);
1007
	if (op->mask.bo) {
1006
	if (op->mask.bo) {
1008
		assert(op->u.gen5.ve_id >> 2);
1007
		assert(op->u.gen5.ve_id >> 2);
1009
		binding_table[2] =
1008
		binding_table[2] =
1010
			gen5_bind_bo(sna,
1009
			gen5_bind_bo(sna,
1011
				     op->mask.bo,
1010
				     op->mask.bo,
1012
				     op->mask.width,
1011
				     op->mask.width,
1013
				     op->mask.height,
1012
				     op->mask.height,
1014
				     op->mask.card_format,
1013
				     op->mask.card_format,
1015
				     false);
1014
				     false);
1016
	}
1015
	}
1017
 
1016
 
1018
	if (sna->kgem.surface == offset &&
1017
	if (sna->kgem.surface == offset &&
1019
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
1018
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
1020
	    (op->mask.bo == NULL ||
1019
	    (op->mask.bo == NULL ||
1021
	     sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
1020
	     sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
1022
		sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1021
		sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1023
		offset = sna->render_state.gen5.surface_table;
1022
		offset = sna->render_state.gen5.surface_table;
1024
	}
1023
	}
1025
 
1024
 
1026
	gen5_emit_state(sna, op, offset | dirty);
1025
	gen5_emit_state(sna, op, offset | dirty);
1027
}
1026
}
1028
 
1027
 
1029
fastcall static void
1028
fastcall static void
1030
gen5_render_composite_blt(struct sna *sna,
1029
gen5_render_composite_blt(struct sna *sna,
1031
			  const struct sna_composite_op *op,
1030
			  const struct sna_composite_op *op,
1032
			  const struct sna_composite_rectangles *r)
1031
			  const struct sna_composite_rectangles *r)
1033
{
1032
{
1034
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1033
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1035
	     __FUNCTION__,
1034
	     __FUNCTION__,
1036
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1035
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1037
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1036
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1038
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1037
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1039
	     r->width, r->height));
1038
	     r->width, r->height));
1040
 
1039
 
1041
	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1040
	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1042
	op->prim_emit(sna, op, r);
1041
	op->prim_emit(sna, op, r);
1043
}
1042
}
1044
 
1043
 
1045
#if 0
1044
#if 0
1046
fastcall static void
1045
fastcall static void
1047
gen5_render_composite_box(struct sna *sna,
1046
gen5_render_composite_box(struct sna *sna,
1048
			  const struct sna_composite_op *op,
1047
			  const struct sna_composite_op *op,
1049
			  const BoxRec *box)
1048
			  const BoxRec *box)
1050
{
1049
{
1051
	struct sna_composite_rectangles r;
1050
	struct sna_composite_rectangles r;
1052
 
1051
 
1053
	DBG(("  %s: (%d, %d), (%d, %d)\n",
1052
	DBG(("  %s: (%d, %d), (%d, %d)\n",
1054
	     __FUNCTION__,
1053
	     __FUNCTION__,
1055
	     box->x1, box->y1, box->x2, box->y2));
1054
	     box->x1, box->y1, box->x2, box->y2));
1056
 
1055
 
1057
	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1056
	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1058
 
1057
 
1059
	r.dst.x = box->x1;
1058
	r.dst.x = box->x1;
1060
	r.dst.y = box->y1;
1059
	r.dst.y = box->y1;
1061
	r.width  = box->x2 - box->x1;
1060
	r.width  = box->x2 - box->x1;
1062
	r.height = box->y2 - box->y1;
1061
	r.height = box->y2 - box->y1;
1063
	r.mask = r.src = r.dst;
1062
	r.mask = r.src = r.dst;
1064
 
1063
 
1065
	op->prim_emit(sna, op, &r);
1064
	op->prim_emit(sna, op, &r);
1066
}
1065
}
1067
 
1066
 
1068
static void
1067
static void
1069
gen5_render_composite_boxes__blt(struct sna *sna,
1068
gen5_render_composite_boxes__blt(struct sna *sna,
1070
				 const struct sna_composite_op *op,
1069
				 const struct sna_composite_op *op,
1071
				 const BoxRec *box, int nbox)
1070
				 const BoxRec *box, int nbox)
1072
{
1071
{
1073
	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1072
	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1074
	     __FUNCTION__, nbox, op->dst.x, op->dst.y,
1073
	     __FUNCTION__, nbox, op->dst.x, op->dst.y,
1075
	     op->src.offset[0], op->src.offset[1],
1074
	     op->src.offset[0], op->src.offset[1],
1076
	     op->src.width, op->src.height,
1075
	     op->src.width, op->src.height,
1077
	     op->mask.offset[0], op->mask.offset[1],
1076
	     op->mask.offset[0], op->mask.offset[1],
1078
	     op->mask.width, op->mask.height));
1077
	     op->mask.width, op->mask.height));
1079
 
1078
 
1080
	do {
1079
	do {
1081
		int nbox_this_time;
1080
		int nbox_this_time;
1082
 
1081
 
1083
		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1082
		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1084
						     gen5_bind_surfaces);
1083
						     gen5_bind_surfaces);
1085
		nbox -= nbox_this_time;
1084
		nbox -= nbox_this_time;
1086
 
1085
 
1087
		do {
1086
		do {
1088
			struct sna_composite_rectangles r;
1087
			struct sna_composite_rectangles r;
1089
 
1088
 
1090
			DBG(("  %s: (%d, %d), (%d, %d)\n",
1089
			DBG(("  %s: (%d, %d), (%d, %d)\n",
1091
			     __FUNCTION__,
1090
			     __FUNCTION__,
1092
			     box->x1, box->y1, box->x2, box->y2));
1091
			     box->x1, box->y1, box->x2, box->y2));
1093
 
1092
 
1094
			r.dst.x = box->x1;
1093
			r.dst.x = box->x1;
1095
			r.dst.y = box->y1;
1094
			r.dst.y = box->y1;
1096
			r.width  = box->x2 - box->x1;
1095
			r.width  = box->x2 - box->x1;
1097
			r.height = box->y2 - box->y1;
1096
			r.height = box->y2 - box->y1;
1098
			r.mask = r.src = r.dst;
1097
			r.mask = r.src = r.dst;
1099
			op->prim_emit(sna, op, &r);
1098
			op->prim_emit(sna, op, &r);
1100
			box++;
1099
			box++;
1101
		} while (--nbox_this_time);
1100
		} while (--nbox_this_time);
1102
	} while (nbox);
1101
	} while (nbox);
1103
}
1102
}
1104
 
1103
 
1105
static void
1104
static void
1106
gen5_render_composite_boxes(struct sna *sna,
1105
gen5_render_composite_boxes(struct sna *sna,
1107
			    const struct sna_composite_op *op,
1106
			    const struct sna_composite_op *op,
1108
			    const BoxRec *box, int nbox)
1107
			    const BoxRec *box, int nbox)
1109
{
1108
{
1110
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1109
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1111
 
1110
 
1112
	do {
1111
	do {
1113
		int nbox_this_time;
1112
		int nbox_this_time;
1114
		float *v;
1113
		float *v;
1115
 
1114
 
1116
		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1115
		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1117
						     gen5_bind_surfaces);
1116
						     gen5_bind_surfaces);
1118
		assert(nbox_this_time);
1117
		assert(nbox_this_time);
1119
		nbox -= nbox_this_time;
1118
		nbox -= nbox_this_time;
1120
 
1119
 
1121
		v = sna->render.vertices + sna->render.vertex_used;
1120
		v = sna->render.vertices + sna->render.vertex_used;
1122
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1121
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1123
 
1122
 
1124
		op->emit_boxes(op, box, nbox_this_time, v);
1123
		op->emit_boxes(op, box, nbox_this_time, v);
1125
		box += nbox_this_time;
1124
		box += nbox_this_time;
1126
	} while (nbox);
1125
	} while (nbox);
1127
}
1126
}
1128
 
1127
 
1129
static void
1128
static void
1130
gen5_render_composite_boxes__thread(struct sna *sna,
1129
gen5_render_composite_boxes__thread(struct sna *sna,
1131
				    const struct sna_composite_op *op,
1130
				    const struct sna_composite_op *op,
1132
				    const BoxRec *box, int nbox)
1131
				    const BoxRec *box, int nbox)
1133
{
1132
{
1134
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1133
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1135
 
1134
 
1136
	sna_vertex_lock(&sna->render);
1135
	sna_vertex_lock(&sna->render);
1137
	do {
1136
	do {
1138
		int nbox_this_time;
1137
		int nbox_this_time;
1139
		float *v;
1138
		float *v;
1140
 
1139
 
1141
		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1140
		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1142
						     gen5_bind_surfaces);
1141
						     gen5_bind_surfaces);
1143
		assert(nbox_this_time);
1142
		assert(nbox_this_time);
1144
		nbox -= nbox_this_time;
1143
		nbox -= nbox_this_time;
1145
 
1144
 
1146
		v = sna->render.vertices + sna->render.vertex_used;
1145
		v = sna->render.vertices + sna->render.vertex_used;
1147
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1146
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1148
 
1147
 
1149
		sna_vertex_acquire__locked(&sna->render);
1148
		sna_vertex_acquire__locked(&sna->render);
1150
		sna_vertex_unlock(&sna->render);
1149
		sna_vertex_unlock(&sna->render);
1151
 
1150
 
1152
		op->emit_boxes(op, box, nbox_this_time, v);
1151
		op->emit_boxes(op, box, nbox_this_time, v);
1153
		box += nbox_this_time;
1152
		box += nbox_this_time;
1154
 
1153
 
1155
		sna_vertex_lock(&sna->render);
1154
		sna_vertex_lock(&sna->render);
1156
		sna_vertex_release__locked(&sna->render);
1155
		sna_vertex_release__locked(&sna->render);
1157
	} while (nbox);
1156
	} while (nbox);
1158
	sna_vertex_unlock(&sna->render);
1157
	sna_vertex_unlock(&sna->render);
1159
}
1158
}
1160
 
1159
 
1161
#ifndef MAX
1160
#ifndef MAX
1162
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1161
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1163
#endif
1162
#endif
1164
 
1163
 
1165
static uint32_t gen5_bind_video_source(struct sna *sna,
1164
static uint32_t gen5_bind_video_source(struct sna *sna,
1166
				       struct kgem_bo *src_bo,
1165
				       struct kgem_bo *src_bo,
1167
				       uint32_t src_offset,
1166
				       uint32_t src_offset,
1168
				       int src_width,
1167
				       int src_width,
1169
				       int src_height,
1168
				       int src_height,
1170
				       int src_pitch,
1169
				       int src_pitch,
1171
				       uint32_t src_surf_format)
1170
				       uint32_t src_surf_format)
1172
{
1171
{
1173
	struct gen5_surface_state *ss;
1172
	struct gen5_surface_state *ss;
1174
 
1173
 
1175
	sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1174
	sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1176
 
1175
 
1177
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1176
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1178
	ss->ss0.surface_type = GEN5_SURFACE_2D;
1177
	ss->ss0.surface_type = GEN5_SURFACE_2D;
1179
	ss->ss0.surface_format = src_surf_format;
1178
	ss->ss0.surface_format = src_surf_format;
1180
	ss->ss0.color_blend = 1;
1179
	ss->ss0.color_blend = 1;
1181
 
1180
 
1182
	ss->ss1.base_addr =
1181
	ss->ss1.base_addr =
1183
		kgem_add_reloc(&sna->kgem,
1182
		kgem_add_reloc(&sna->kgem,
1184
			       sna->kgem.surface + 1,
1183
			       sna->kgem.surface + 1,
1185
			       src_bo,
1184
			       src_bo,
1186
			       I915_GEM_DOMAIN_SAMPLER << 16,
1185
			       I915_GEM_DOMAIN_SAMPLER << 16,
1187
			       src_offset);
1186
			       src_offset);
1188
 
1187
 
1189
	ss->ss2.width  = src_width - 1;
1188
	ss->ss2.width  = src_width - 1;
1190
	ss->ss2.height = src_height - 1;
1189
	ss->ss2.height = src_height - 1;
1191
	ss->ss3.pitch  = src_pitch - 1;
1190
	ss->ss3.pitch  = src_pitch - 1;
1192
 
1191
 
1193
	return sna->kgem.surface * sizeof(uint32_t);
1192
	return sna->kgem.surface * sizeof(uint32_t);
1194
}
1193
}
1195
 
1194
 
1196
static void gen5_video_bind_surfaces(struct sna *sna,
1195
static void gen5_video_bind_surfaces(struct sna *sna,
1197
				     const struct sna_composite_op *op)
1196
				     const struct sna_composite_op *op)
1198
{
1197
{
1199
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
1198
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
1200
	struct sna_video_frame *frame = op->priv;
1199
	struct sna_video_frame *frame = op->priv;
1201
	uint32_t src_surf_format;
1200
	uint32_t src_surf_format;
1202
	uint32_t src_surf_base[6];
1201
	uint32_t src_surf_base[6];
1203
	int src_width[6];
1202
	int src_width[6];
1204
	int src_height[6];
1203
	int src_height[6];
1205
	int src_pitch[6];
1204
	int src_pitch[6];
1206
	uint32_t *binding_table;
1205
	uint32_t *binding_table;
1207
	uint16_t offset;
1206
	uint16_t offset;
1208
	int n_src, n;
1207
	int n_src, n;
1209
 
1208
 
1210
	src_surf_base[0] = 0;
1209
	src_surf_base[0] = 0;
1211
	src_surf_base[1] = 0;
1210
	src_surf_base[1] = 0;
1212
	src_surf_base[2] = frame->VBufOffset;
1211
	src_surf_base[2] = frame->VBufOffset;
1213
	src_surf_base[3] = frame->VBufOffset;
1212
	src_surf_base[3] = frame->VBufOffset;
1214
	src_surf_base[4] = frame->UBufOffset;
1213
	src_surf_base[4] = frame->UBufOffset;
1215
	src_surf_base[5] = frame->UBufOffset;
1214
	src_surf_base[5] = frame->UBufOffset;
1216
 
1215
 
1217
	if (is_planar_fourcc(frame->id)) {
1216
	if (is_planar_fourcc(frame->id)) {
1218
		src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
1217
		src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
1219
		src_width[1]  = src_width[0]  = frame->width;
1218
		src_width[1]  = src_width[0]  = frame->width;
1220
		src_height[1] = src_height[0] = frame->height;
1219
		src_height[1] = src_height[0] = frame->height;
1221
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1220
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1222
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1221
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1223
			frame->width / 2;
1222
			frame->width / 2;
1224
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1223
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1225
			frame->height / 2;
1224
			frame->height / 2;
1226
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1225
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1227
			frame->pitch[0];
1226
			frame->pitch[0];
1228
		n_src = 6;
1227
		n_src = 6;
1229
	} else {
1228
	} else {
1230
		if (frame->id == FOURCC_UYVY)
1229
		if (frame->id == FOURCC_UYVY)
1231
			src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
1230
			src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
1232
		else
1231
		else
1233
			src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
1232
			src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
1234
 
1233
 
1235
		src_width[0]  = frame->width;
1234
		src_width[0]  = frame->width;
1236
		src_height[0] = frame->height;
1235
		src_height[0] = frame->height;
1237
		src_pitch[0]  = frame->pitch[0];
1236
		src_pitch[0]  = frame->pitch[0];
1238
		n_src = 1;
1237
		n_src = 1;
1239
	}
1238
	}
1240
 
1239
 
1241
	gen5_get_batch(sna, op);
1240
	gen5_get_batch(sna, op);
1242
 
1241
 
1243
	binding_table = gen5_composite_get_binding_table(sna, &offset);
1242
	binding_table = gen5_composite_get_binding_table(sna, &offset);
1244
	binding_table[0] =
1243
	binding_table[0] =
1245
		gen5_bind_bo(sna,
1244
		gen5_bind_bo(sna,
1246
			     op->dst.bo, op->dst.width, op->dst.height,
1245
			     op->dst.bo, op->dst.width, op->dst.height,
1247
			     gen5_get_dest_format(op->dst.format),
1246
			     gen5_get_dest_format(op->dst.format),
1248
			     true);
1247
			     true);
1249
	for (n = 0; n < n_src; n++) {
1248
	for (n = 0; n < n_src; n++) {
1250
		binding_table[1+n] =
1249
		binding_table[1+n] =
1251
			gen5_bind_video_source(sna,
1250
			gen5_bind_video_source(sna,
1252
					       frame->bo,
1251
					       frame->bo,
1253
					       src_surf_base[n],
1252
					       src_surf_base[n],
1254
					       src_width[n],
1253
					       src_width[n],
1255
					       src_height[n],
1254
					       src_height[n],
1256
					       src_pitch[n],
1255
					       src_pitch[n],
1257
					       src_surf_format);
1256
					       src_surf_format);
1258
	}
1257
	}
1259
 
1258
 
1260
	gen5_emit_state(sna, op, offset | dirty);
1259
	gen5_emit_state(sna, op, offset | dirty);
1261
}
1260
}
1262
 
1261
 
1263
static bool
1262
static bool
1264
gen5_render_video(struct sna *sna,
1263
gen5_render_video(struct sna *sna,
1265
		  struct sna_video *video,
1264
		  struct sna_video *video,
1266
		  struct sna_video_frame *frame,
1265
		  struct sna_video_frame *frame,
1267
		  RegionPtr dstRegion,
1266
		  RegionPtr dstRegion,
1268
		  PixmapPtr pixmap)
1267
		  PixmapPtr pixmap)
1269
{
1268
{
1270
	struct sna_composite_op tmp;
1269
	struct sna_composite_op tmp;
1271
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1270
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1272
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1271
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1273
	int src_width = frame->src.x2 - frame->src.x1;
1272
	int src_width = frame->src.x2 - frame->src.x1;
1274
	int src_height = frame->src.y2 - frame->src.y1;
1273
	int src_height = frame->src.y2 - frame->src.y1;
1275
	float src_offset_x, src_offset_y;
1274
	float src_offset_x, src_offset_y;
1276
	float src_scale_x, src_scale_y;
1275
	float src_scale_x, src_scale_y;
1277
	int nbox, pix_xoff, pix_yoff;
1276
	int nbox, pix_xoff, pix_yoff;
1278
	struct sna_pixmap *priv;
1277
	struct sna_pixmap *priv;
1279
	BoxPtr box;
1278
	BoxPtr box;
1280
 
1279
 
1281
	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1280
	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1282
	     src_width, src_height, dst_width, dst_height));
1281
	     src_width, src_height, dst_width, dst_height));
1283
 
1282
 
1284
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1283
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1285
	if (priv == NULL)
1284
	if (priv == NULL)
1286
		return false;
1285
		return false;
1287
 
1286
 
1288
	memset(&tmp, 0, sizeof(tmp));
1287
	memset(&tmp, 0, sizeof(tmp));
1289
 
1288
 
1290
	tmp.op = PictOpSrc;
1289
	tmp.op = PictOpSrc;
1291
	tmp.dst.pixmap = pixmap;
1290
	tmp.dst.pixmap = pixmap;
1292
	tmp.dst.width  = pixmap->drawable.width;
1291
	tmp.dst.width  = pixmap->drawable.width;
1293
	tmp.dst.height = pixmap->drawable.height;
1292
	tmp.dst.height = pixmap->drawable.height;
1294
	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1293
	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1295
	tmp.dst.bo = priv->gpu_bo;
1294
	tmp.dst.bo = priv->gpu_bo;
1296
 
1295
 
1297
	if (src_width == dst_width && src_height == dst_height)
1296
	if (src_width == dst_width && src_height == dst_height)
1298
		tmp.src.filter = SAMPLER_FILTER_NEAREST;
1297
		tmp.src.filter = SAMPLER_FILTER_NEAREST;
1299
	else
1298
	else
1300
		tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1299
		tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1301
	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1300
	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1302
	tmp.src.bo = frame->bo;
1301
	tmp.src.bo = frame->bo;
1303
	tmp.mask.bo = NULL;
1302
	tmp.mask.bo = NULL;
1304
	tmp.u.gen5.wm_kernel =
1303
	tmp.u.gen5.wm_kernel =
1305
		is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
1304
		is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
1306
	tmp.u.gen5.ve_id = 2;
1305
	tmp.u.gen5.ve_id = 2;
1307
	tmp.is_affine = true;
1306
	tmp.is_affine = true;
1308
	tmp.floats_per_vertex = 3;
1307
	tmp.floats_per_vertex = 3;
1309
	tmp.floats_per_rect = 9;
1308
	tmp.floats_per_rect = 9;
1310
	tmp.priv = frame;
1309
	tmp.priv = frame;
1311
 
1310
 
1312
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1311
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1313
		kgem_submit(&sna->kgem);
1312
		kgem_submit(&sna->kgem);
1314
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1313
		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
-
 
1314
			return false;
1315
	}
1315
	}
1316
 
-
 
1317
	gen5_video_bind_surfaces(sna, &tmp);
1316
 
-
 
1317
	gen5_align_vertex(sna, &tmp);
1318
	gen5_align_vertex(sna, &tmp);
1318
	gen5_video_bind_surfaces(sna, &tmp);
1319
 
1319
 
1320
	/* Set up the offset for translating from the given region (in screen
1320
	/* Set up the offset for translating from the given region (in screen
1321
	 * coordinates) to the backing pixmap.
1321
	 * coordinates) to the backing pixmap.
1322
	 */
1322
	 */
1323
#ifdef COMPOSITE
1323
#ifdef COMPOSITE
1324
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1324
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1325
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1325
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1326
#else
1326
#else
1327
	pix_xoff = 0;
1327
	pix_xoff = 0;
1328
	pix_yoff = 0;
1328
	pix_yoff = 0;
1329
#endif
1329
#endif
1330
 
1330
 
1331
	src_scale_x = (float)src_width / dst_width / frame->width;
1331
	src_scale_x = (float)src_width / dst_width / frame->width;
1332
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1332
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1333
 
1333
 
1334
	src_scale_y = (float)src_height / dst_height / frame->height;
1334
	src_scale_y = (float)src_height / dst_height / frame->height;
1335
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1335
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1336
 
1336
 
1337
	box = REGION_RECTS(dstRegion);
1337
	box = REGION_RECTS(dstRegion);
1338
	nbox = REGION_NUM_RECTS(dstRegion);
1338
	nbox = REGION_NUM_RECTS(dstRegion);
1339
	while (nbox--) {
1339
	while (nbox--) {
1340
		BoxRec r;
1340
		BoxRec r;
1341
 
1341
 
1342
		r.x1 = box->x1 + pix_xoff;
1342
		r.x1 = box->x1 + pix_xoff;
1343
		r.x2 = box->x2 + pix_xoff;
1343
		r.x2 = box->x2 + pix_xoff;
1344
		r.y1 = box->y1 + pix_yoff;
1344
		r.y1 = box->y1 + pix_yoff;
1345
		r.y2 = box->y2 + pix_yoff;
1345
		r.y2 = box->y2 + pix_yoff;
1346
 
1346
 
1347
		gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
1347
		gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
1348
 
1348
 
1349
		OUT_VERTEX(r.x2, r.y2);
1349
		OUT_VERTEX(r.x2, r.y2);
1350
		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1350
		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1351
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1351
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1352
 
1352
 
1353
		OUT_VERTEX(r.x1, r.y2);
1353
		OUT_VERTEX(r.x1, r.y2);
1354
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1354
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1355
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1355
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1356
 
1356
 
1357
		OUT_VERTEX(r.x1, r.y1);
1357
		OUT_VERTEX(r.x1, r.y1);
1358
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1358
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1359
		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1359
		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1360
 
1360
 
1361
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1361
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1362
			sna_damage_add_box(&priv->gpu_damage, &r);
1362
			sna_damage_add_box(&priv->gpu_damage, &r);
1363
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1363
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1364
		}
1364
		}
1365
		box++;
1365
		box++;
1366
	}
1366
	}
1367
 
1367
 
1368
	gen4_vertex_flush(sna);
1368
	gen4_vertex_flush(sna);
1369
	return true;
1369
	return true;
1370
}
1370
}
1371
#endif
1371
#endif
1372
 
1372
 
1373
static void
1373
static void
1374
gen5_render_composite_done(struct sna *sna,
1374
gen5_render_composite_done(struct sna *sna,
1375
			   const struct sna_composite_op *op)
1375
			   const struct sna_composite_op *op)
1376
{
1376
{
1377
	if (sna->render.vertex_offset) {
1377
	if (sna->render.vertex_offset) {
1378
		gen4_vertex_flush(sna);
1378
		gen4_vertex_flush(sna);
1379
		gen5_magic_ca_pass(sna,op);
1379
		gen5_magic_ca_pass(sna,op);
1380
	}
1380
	}
1381
 
1381
 
1382
	DBG(("%s()\n", __FUNCTION__));
1382
	DBG(("%s()\n", __FUNCTION__));
1383
 
1383
 
1384
}
1384
}
1385
 
1385
 
1386
#if 0
1386
#if 0
1387
static bool
1387
static bool
1388
gen5_composite_set_target(struct sna *sna,
1388
gen5_composite_set_target(struct sna *sna,
1389
			  struct sna_composite_op *op,
1389
			  struct sna_composite_op *op,
1390
			  PicturePtr dst,
1390
			  PicturePtr dst,
1391
			  int x, int y, int w, int h,
1391
			  int x, int y, int w, int h,
1392
			  bool partial)
1392
			  bool partial)
1393
{
1393
{
1394
	BoxRec box;
1394
	BoxRec box;
1395
 
1395
 
1396
	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1396
	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1397
	op->dst.width  = op->dst.pixmap->drawable.width;
1397
	op->dst.width  = op->dst.pixmap->drawable.width;
1398
	op->dst.height = op->dst.pixmap->drawable.height;
1398
	op->dst.height = op->dst.pixmap->drawable.height;
1399
	op->dst.format = dst->format;
1399
	op->dst.format = dst->format;
1400
	if (w && h) {
1400
	if (w && h) {
1401
		box.x1 = x;
1401
		box.x1 = x;
1402
		box.y1 = y;
1402
		box.y1 = y;
1403
		box.x2 = x + w;
1403
		box.x2 = x + w;
1404
		box.y2 = y + h;
1404
		box.y2 = y + h;
1405
	} else
1405
	} else
1406
		sna_render_picture_extents(dst, &box);
1406
		sna_render_picture_extents(dst, &box);
1407
 
1407
 
1408
	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1408
	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1409
					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1409
					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1410
					  &box, &op->damage);
1410
					  &box, &op->damage);
1411
	if (op->dst.bo == NULL)
1411
	if (op->dst.bo == NULL)
1412
		return false;
1412
		return false;
1413
 
1413
 
1414
	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1414
	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1415
			    &op->dst.x, &op->dst.y);
1415
			    &op->dst.x, &op->dst.y);
1416
 
1416
 
1417
	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1417
	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1418
	     __FUNCTION__,
1418
	     __FUNCTION__,
1419
	     op->dst.pixmap, (int)op->dst.format,
1419
	     op->dst.pixmap, (int)op->dst.format,
1420
	     op->dst.width, op->dst.height,
1420
	     op->dst.width, op->dst.height,
1421
	     op->dst.bo->pitch,
1421
	     op->dst.bo->pitch,
1422
	     op->dst.x, op->dst.y,
1422
	     op->dst.x, op->dst.y,
1423
	     op->damage ? *op->damage : (void *)-1));
1423
	     op->damage ? *op->damage : (void *)-1));
1424
 
1424
 
1425
	assert(op->dst.bo->proxy == NULL);
1425
	assert(op->dst.bo->proxy == NULL);
1426
 
1426
 
1427
	if (too_large(op->dst.width, op->dst.height) &&
1427
	if (too_large(op->dst.width, op->dst.height) &&
1428
	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1428
	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1429
		return false;
1429
		return false;
1430
 
1430
 
1431
	return true;
1431
	return true;
1432
}
1432
}
1433
 
1433
 
1434
static bool
1434
static bool
1435
gen5_render_composite(struct sna *sna,
1435
gen5_render_composite(struct sna *sna,
1436
		      uint8_t op,
1436
		      uint8_t op,
1437
		      PicturePtr src,
1437
		      PicturePtr src,
1438
		      PicturePtr mask,
1438
		      PicturePtr mask,
1439
		      PicturePtr dst,
1439
		      PicturePtr dst,
1440
		      int16_t src_x, int16_t src_y,
1440
		      int16_t src_x, int16_t src_y,
1441
		      int16_t msk_x, int16_t msk_y,
1441
		      int16_t msk_x, int16_t msk_y,
1442
		      int16_t dst_x, int16_t dst_y,
1442
		      int16_t dst_x, int16_t dst_y,
1443
		      int16_t width, int16_t height,
1443
		      int16_t width, int16_t height,
1444
		      struct sna_composite_op *tmp)
1444
		      struct sna_composite_op *tmp)
1445
{
1445
{
1446
	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1446
	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1447
	     width, height, sna->kgem.mode));
1447
	     width, height, sna->kgem.mode));
1448
 
1448
 
1449
	if (op >= ARRAY_SIZE(gen5_blend_op)) {
1449
	if (op >= ARRAY_SIZE(gen5_blend_op)) {
1450
		DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
1450
		DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
1451
		return false;
1451
		return false;
1452
	}
1452
	}
1453
 
1453
 
1454
	if (mask == NULL &&
1454
	if (mask == NULL &&
1455
	    try_blt(sna, dst, src, width, height) &&
-
 
1456
	    sna_blt_composite(sna, op,
1455
	    sna_blt_composite(sna, op,
1457
			      src, dst,
1456
			      src, dst,
1458
			      src_x, src_y,
1457
			      src_x, src_y,
1459
			      dst_x, dst_y,
1458
			      dst_x, dst_y,
1460
			      width, height,
1459
			      width, height,
1461
			      tmp, false))
1460
			      tmp, false))
1462
		return true;
1461
		return true;
1463
 
1462
 
1464
	if (gen5_composite_fallback(sna, src, mask, dst))
1463
	if (gen5_composite_fallback(sna, src, mask, dst))
1465
		return false;
1464
		return false;
1466
 
1465
 
1467
	if (need_tiling(sna, width, height))
1466
	if (need_tiling(sna, width, height))
1468
		return sna_tiling_composite(op, src, mask, dst,
1467
		return sna_tiling_composite(op, src, mask, dst,
1469
					    src_x, src_y,
1468
					    src_x, src_y,
1470
					    msk_x, msk_y,
1469
					    msk_x, msk_y,
1471
					    dst_x, dst_y,
1470
					    dst_x, dst_y,
1472
					    width, height,
1471
					    width, height,
1473
					    tmp);
1472
					    tmp);
1474
 
1473
 
1475
	if (!gen5_composite_set_target(sna, tmp, dst,
1474
	if (!gen5_composite_set_target(sna, tmp, dst,
1476
				       dst_x, dst_y, width, height,
1475
				       dst_x, dst_y, width, height,
1477
				       op > PictOpSrc || dst->pCompositeClip->data)) {
1476
				       op > PictOpSrc || dst->pCompositeClip->data)) {
1478
		DBG(("%s: failed to set composite target\n", __FUNCTION__));
1477
		DBG(("%s: failed to set composite target\n", __FUNCTION__));
1479
		return false;
1478
		return false;
1480
	}
1479
	}
1481
 
1480
 
1482
	DBG(("%s: preparing source\n", __FUNCTION__));
1481
	DBG(("%s: preparing source\n", __FUNCTION__));
1483
	tmp->op = op;
1482
	tmp->op = op;
1484
	switch (gen5_composite_picture(sna, src, &tmp->src,
1483
	switch (gen5_composite_picture(sna, src, &tmp->src,
1485
				       src_x, src_y,
1484
				       src_x, src_y,
1486
				       width, height,
1485
				       width, height,
1487
				       dst_x, dst_y,
1486
				       dst_x, dst_y,
1488
				       dst->polyMode == PolyModePrecise)) {
1487
				       dst->polyMode == PolyModePrecise)) {
1489
	case -1:
1488
	case -1:
1490
		DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
1489
		DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
1491
		goto cleanup_dst;
1490
		goto cleanup_dst;
1492
	case 0:
1491
	case 0:
1493
		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1492
		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1494
			goto cleanup_dst;
1493
			goto cleanup_dst;
1495
		/* fall through to fixup */
1494
		/* fall through to fixup */
1496
	case 1:
1495
	case 1:
1497
		if (mask == NULL &&
1496
		if (mask == NULL &&
1498
		    sna_blt_composite__convert(sna,
1497
		    sna_blt_composite__convert(sna,
1499
					       dst_x, dst_y, width, height,
1498
					       dst_x, dst_y, width, height,
1500
					       tmp))
1499
					       tmp))
1501
			return true;
1500
			return true;
1502
 
1501
 
1503
		gen5_composite_channel_convert(&tmp->src);
1502
		gen5_composite_channel_convert(&tmp->src);
1504
		break;
1503
		break;
1505
	}
1504
	}
1506
 
1505
 
1507
	tmp->is_affine = tmp->src.is_affine;
1506
	tmp->is_affine = tmp->src.is_affine;
1508
	tmp->has_component_alpha = false;
1507
	tmp->has_component_alpha = false;
1509
	tmp->need_magic_ca_pass = false;
1508
	tmp->need_magic_ca_pass = false;
1510
 
1509
 
1511
	if (mask) {
1510
	if (mask) {
1512
		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1511
		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1513
			tmp->has_component_alpha = true;
1512
			tmp->has_component_alpha = true;
1514
 
1513
 
1515
			/* Check if it's component alpha that relies on a source alpha and on
1514
			/* Check if it's component alpha that relies on a source alpha and on
1516
			 * the source value.  We can only get one of those into the single
1515
			 * the source value.  We can only get one of those into the single
1517
			 * source value that we get to blend with.
1516
			 * source value that we get to blend with.
1518
			 */
1517
			 */
1519
			if (gen5_blend_op[op].src_alpha &&
1518
			if (gen5_blend_op[op].src_alpha &&
1520
			    (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
1519
			    (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
1521
				if (op != PictOpOver) {
1520
				if (op != PictOpOver) {
1522
					DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
1521
					DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
1523
					goto cleanup_src;
1522
					goto cleanup_src;
1524
				}
1523
				}
1525
 
1524
 
1526
				tmp->need_magic_ca_pass = true;
1525
				tmp->need_magic_ca_pass = true;
1527
				tmp->op = PictOpOutReverse;
1526
				tmp->op = PictOpOutReverse;
1528
			}
1527
			}
1529
		}
1528
		}
1530
 
1529
 
1531
		if (!reuse_source(sna,
1530
		if (!reuse_source(sna,
1532
				  src, &tmp->src, src_x, src_y,
1531
				  src, &tmp->src, src_x, src_y,
1533
				  mask, &tmp->mask, msk_x, msk_y)) {
1532
				  mask, &tmp->mask, msk_x, msk_y)) {
1534
			DBG(("%s: preparing mask\n", __FUNCTION__));
1533
			DBG(("%s: preparing mask\n", __FUNCTION__));
1535
			switch (gen5_composite_picture(sna, mask, &tmp->mask,
1534
			switch (gen5_composite_picture(sna, mask, &tmp->mask,
1536
						       msk_x, msk_y,
1535
						       msk_x, msk_y,
1537
						       width, height,
1536
						       width, height,
1538
						       dst_x, dst_y,
1537
						       dst_x, dst_y,
1539
						       dst->polyMode == PolyModePrecise)) {
1538
						       dst->polyMode == PolyModePrecise)) {
1540
			case -1:
1539
			case -1:
1541
				DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
1540
				DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
1542
				goto cleanup_src;
1541
				goto cleanup_src;
1543
			case 0:
1542
			case 0:
1544
				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
1543
				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
1545
					goto cleanup_src;
1544
					goto cleanup_src;
1546
				/* fall through to fixup */
1545
				/* fall through to fixup */
1547
			case 1:
1546
			case 1:
1548
				gen5_composite_channel_convert(&tmp->mask);
1547
				gen5_composite_channel_convert(&tmp->mask);
1549
				break;
1548
				break;
1550
			}
1549
			}
1551
		}
1550
		}
1552
 
1551
 
1553
		tmp->is_affine &= tmp->mask.is_affine;
1552
		tmp->is_affine &= tmp->mask.is_affine;
1554
	}
1553
	}
1555
 
1554
 
1556
	tmp->u.gen5.wm_kernel =
1555
	tmp->u.gen5.wm_kernel =
1557
		gen5_choose_composite_kernel(tmp->op,
1556
		gen5_choose_composite_kernel(tmp->op,
1558
					     tmp->mask.bo != NULL,
1557
					     tmp->mask.bo != NULL,
1559
					     tmp->has_component_alpha,
1558
					     tmp->has_component_alpha,
1560
					     tmp->is_affine);
1559
					     tmp->is_affine);
1561
	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
1560
	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
1562
 
1561
 
1563
	tmp->blt   = gen5_render_composite_blt;
1562
	tmp->blt   = gen5_render_composite_blt;
1564
	tmp->box   = gen5_render_composite_box;
1563
	tmp->box   = gen5_render_composite_box;
1565
	tmp->boxes = gen5_render_composite_boxes__blt;
1564
	tmp->boxes = gen5_render_composite_boxes__blt;
1566
	if (tmp->emit_boxes) {
1565
	if (tmp->emit_boxes) {
1567
		tmp->boxes = gen5_render_composite_boxes;
1566
		tmp->boxes = gen5_render_composite_boxes;
1568
		tmp->thread_boxes = gen5_render_composite_boxes__thread;
1567
		tmp->thread_boxes = gen5_render_composite_boxes__thread;
1569
	}
1568
	}
1570
	tmp->done  = gen5_render_composite_done;
1569
	tmp->done  = gen5_render_composite_done;
1571
 
1570
 
1572
	if (!kgem_check_bo(&sna->kgem,
1571
	if (!kgem_check_bo(&sna->kgem,
1573
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
1572
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
1574
		kgem_submit(&sna->kgem);
1573
		kgem_submit(&sna->kgem);
1575
		if (!kgem_check_bo(&sna->kgem,
1574
		if (!kgem_check_bo(&sna->kgem,
1576
				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
1575
				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
1577
			goto cleanup_mask;
1576
			goto cleanup_mask;
1578
	}
1577
	}
1579
 
-
 
1580
	gen5_bind_surfaces(sna, tmp);
1578
 
-
 
1579
	gen5_align_vertex(sna, tmp);
1581
	gen5_align_vertex(sna, tmp);
1580
	gen5_bind_surfaces(sna, tmp);
1582
	return true;
1581
	return true;
1583
 
1582
 
1584
cleanup_mask:
1583
cleanup_mask:
1585
	if (tmp->mask.bo)
1584
	if (tmp->mask.bo)
1586
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
1585
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
1587
cleanup_src:
1586
cleanup_src:
1588
	if (tmp->src.bo)
1587
	if (tmp->src.bo)
1589
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
1588
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
1590
cleanup_dst:
1589
cleanup_dst:
1591
	if (tmp->redirect.real_bo)
1590
	if (tmp->redirect.real_bo)
1592
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
1591
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
1593
	return false;
1592
	return false;
1594
}
1593
}
1595
 
1594
 
1596
#if !NO_COMPOSITE_SPANS
1595
#if !NO_COMPOSITE_SPANS
1597
fastcall static void
1596
fastcall static void
1598
gen5_render_composite_spans_box(struct sna *sna,
1597
gen5_render_composite_spans_box(struct sna *sna,
1599
				const struct sna_composite_spans_op *op,
1598
				const struct sna_composite_spans_op *op,
1600
				const BoxRec *box, float opacity)
1599
				const BoxRec *box, float opacity)
1601
{
1600
{
1602
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
1601
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
1603
	     __FUNCTION__,
1602
	     __FUNCTION__,
1604
	     op->base.src.offset[0], op->base.src.offset[1],
1603
	     op->base.src.offset[0], op->base.src.offset[1],
1605
	     opacity,
1604
	     opacity,
1606
	     op->base.dst.x, op->base.dst.y,
1605
	     op->base.dst.x, op->base.dst.y,
1607
	     box->x1, box->y1,
1606
	     box->x1, box->y1,
1608
	     box->x2 - box->x1,
1607
	     box->x2 - box->x1,
1609
	     box->y2 - box->y1));
1608
	     box->y2 - box->y1));
1610
 
1609
 
1611
	gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
1610
	gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
1612
	op->prim_emit(sna, op, box, opacity);
1611
	op->prim_emit(sna, op, box, opacity);
1613
}
1612
}
1614
 
1613
 
1615
static void
1614
static void
1616
gen5_render_composite_spans_boxes(struct sna *sna,
1615
gen5_render_composite_spans_boxes(struct sna *sna,
1617
				  const struct sna_composite_spans_op *op,
1616
				  const struct sna_composite_spans_op *op,
1618
				  const BoxRec *box, int nbox,
1617
				  const BoxRec *box, int nbox,
1619
				  float opacity)
1618
				  float opacity)
1620
{
1619
{
1621
	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
1620
	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
1622
	     __FUNCTION__, nbox,
1621
	     __FUNCTION__, nbox,
1623
	     op->base.src.offset[0], op->base.src.offset[1],
1622
	     op->base.src.offset[0], op->base.src.offset[1],
1624
	     opacity,
1623
	     opacity,
1625
	     op->base.dst.x, op->base.dst.y));
1624
	     op->base.dst.x, op->base.dst.y));
1626
 
1625
 
1627
	do {
1626
	do {
1628
		int nbox_this_time;
1627
		int nbox_this_time;
1629
 
1628
 
1630
		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
1629
		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
1631
						     gen5_bind_surfaces);
1630
						     gen5_bind_surfaces);
1632
		nbox -= nbox_this_time;
1631
		nbox -= nbox_this_time;
1633
 
1632
 
1634
		do {
1633
		do {
1635
			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1634
			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1636
			     box->x1, box->y1,
1635
			     box->x1, box->y1,
1637
			     box->x2 - box->x1,
1636
			     box->x2 - box->x1,
1638
			     box->y2 - box->y1));
1637
			     box->y2 - box->y1));
1639
 
1638
 
1640
			op->prim_emit(sna, op, box++, opacity);
1639
			op->prim_emit(sna, op, box++, opacity);
1641
		} while (--nbox_this_time);
1640
		} while (--nbox_this_time);
1642
	} while (nbox);
1641
	} while (nbox);
1643
}
1642
}
1644
 
1643
 
1645
fastcall static void
1644
fastcall static void
1646
gen5_render_composite_spans_boxes__thread(struct sna *sna,
1645
gen5_render_composite_spans_boxes__thread(struct sna *sna,
1647
					  const struct sna_composite_spans_op *op,
1646
					  const struct sna_composite_spans_op *op,
1648
					  const struct sna_opacity_box *box,
1647
					  const struct sna_opacity_box *box,
1649
					  int nbox)
1648
					  int nbox)
1650
{
1649
{
1651
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
1650
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
1652
	     __FUNCTION__, nbox,
1651
	     __FUNCTION__, nbox,
1653
	     op->base.src.offset[0], op->base.src.offset[1],
1652
	     op->base.src.offset[0], op->base.src.offset[1],
1654
	     op->base.dst.x, op->base.dst.y));
1653
	     op->base.dst.x, op->base.dst.y));
1655
 
1654
 
1656
	sna_vertex_lock(&sna->render);
1655
	sna_vertex_lock(&sna->render);
1657
	do {
1656
	do {
1658
		int nbox_this_time;
1657
		int nbox_this_time;
1659
		float *v;
1658
		float *v;
1660
 
1659
 
1661
		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
1660
		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
1662
						     gen5_bind_surfaces);
1661
						     gen5_bind_surfaces);
1663
		assert(nbox_this_time);
1662
		assert(nbox_this_time);
1664
		nbox -= nbox_this_time;
1663
		nbox -= nbox_this_time;
1665
 
1664
 
1666
		v = sna->render.vertices + sna->render.vertex_used;
1665
		v = sna->render.vertices + sna->render.vertex_used;
1667
		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
1666
		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
1668
 
1667
 
1669
		sna_vertex_acquire__locked(&sna->render);
1668
		sna_vertex_acquire__locked(&sna->render);
1670
		sna_vertex_unlock(&sna->render);
1669
		sna_vertex_unlock(&sna->render);
1671
 
1670
 
1672
		op->emit_boxes(op, box, nbox_this_time, v);
1671
		op->emit_boxes(op, box, nbox_this_time, v);
1673
		box += nbox_this_time;
1672
		box += nbox_this_time;
1674
 
1673
 
1675
		sna_vertex_lock(&sna->render);
1674
		sna_vertex_lock(&sna->render);
1676
		sna_vertex_release__locked(&sna->render);
1675
		sna_vertex_release__locked(&sna->render);
1677
	} while (nbox);
1676
	} while (nbox);
1678
	sna_vertex_unlock(&sna->render);
1677
	sna_vertex_unlock(&sna->render);
1679
}
1678
}
1680
 
1679
 
1681
fastcall static void
1680
fastcall static void
1682
gen5_render_composite_spans_done(struct sna *sna,
1681
gen5_render_composite_spans_done(struct sna *sna,
1683
				 const struct sna_composite_spans_op *op)
1682
				 const struct sna_composite_spans_op *op)
1684
{
1683
{
1685
	if (sna->render.vertex_offset)
1684
	if (sna->render.vertex_offset)
1686
		gen4_vertex_flush(sna);
1685
		gen4_vertex_flush(sna);
1687
 
1686
 
1688
	DBG(("%s()\n", __FUNCTION__));
1687
	DBG(("%s()\n", __FUNCTION__));
1689
 
1688
 
1690
	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
1689
	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
1691
	sna_render_composite_redirect_done(sna, &op->base);
1690
	sna_render_composite_redirect_done(sna, &op->base);
1692
}
1691
}
1693
 
1692
 
1694
static bool
1693
static bool
1695
gen5_check_composite_spans(struct sna *sna,
1694
gen5_check_composite_spans(struct sna *sna,
1696
			   uint8_t op, PicturePtr src, PicturePtr dst,
1695
			   uint8_t op, PicturePtr src, PicturePtr dst,
1697
			   int16_t width, int16_t height,
1696
			   int16_t width, int16_t height,
1698
			   unsigned flags)
1697
			   unsigned flags)
1699
{
1698
{
1700
	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
1699
	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
1701
	     __FUNCTION__, op, width, height, flags));
1700
	     __FUNCTION__, op, width, height, flags));
1702
 
1701
 
1703
	if (op >= ARRAY_SIZE(gen5_blend_op))
1702
	if (op >= ARRAY_SIZE(gen5_blend_op))
1704
		return false;
1703
		return false;
1705
 
1704
 
1706
	if (gen5_composite_fallback(sna, src, NULL, dst)) {
1705
	if (gen5_composite_fallback(sna, src, NULL, dst)) {
1707
		DBG(("%s: operation would fallback\n", __FUNCTION__));
1706
		DBG(("%s: operation would fallback\n", __FUNCTION__));
1708
		return false;
1707
		return false;
1709
	}
1708
	}
1710
 
1709
 
1711
	if (need_tiling(sna, width, height) &&
1710
	if (need_tiling(sna, width, height) &&
1712
	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
1711
	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
1713
		DBG(("%s: fallback, tiled operation not on GPU\n",
1712
		DBG(("%s: fallback, tiled operation not on GPU\n",
1714
		     __FUNCTION__));
1713
		     __FUNCTION__));
1715
		return false;
1714
		return false;
1716
	}
1715
	}
1717
 
1716
 
1718
	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
1717
	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
1719
		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
1718
		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
1720
		assert(priv);
1719
		assert(priv);
1721
 
1720
 
1722
		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1721
		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1723
			return true;
1722
			return true;
1724
 
1723
 
1725
		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
1724
		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
1726
			return false;
1725
			return false;
1727
 
1726
 
1728
		if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
1727
		if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
1729
		    dst->format == PICT_a8)
1728
		    dst->format == PICT_a8)
1730
			return false;
1729
			return false;
1731
 
1730
 
1732
		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
1731
		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
1733
	}
1732
	}
1734
 
1733
 
1735
	return true;
1734
	return true;
1736
}
1735
}
1737
 
1736
 
1738
static bool
1737
static bool
1739
gen5_render_composite_spans(struct sna *sna,
1738
gen5_render_composite_spans(struct sna *sna,
1740
			    uint8_t op,
1739
			    uint8_t op,
1741
			    PicturePtr src,
1740
			    PicturePtr src,
1742
			    PicturePtr dst,
1741
			    PicturePtr dst,
1743
			    int16_t src_x,  int16_t src_y,
1742
			    int16_t src_x,  int16_t src_y,
1744
			    int16_t dst_x,  int16_t dst_y,
1743
			    int16_t dst_x,  int16_t dst_y,
1745
			    int16_t width,  int16_t height,
1744
			    int16_t width,  int16_t height,
1746
			    unsigned flags,
1745
			    unsigned flags,
1747
			    struct sna_composite_spans_op *tmp)
1746
			    struct sna_composite_spans_op *tmp)
1748
{
1747
{
1749
	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
1748
	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
1750
	     width, height, flags, sna->kgem.ring));
1749
	     width, height, flags, sna->kgem.ring));
1751
 
1750
 
1752
	assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
1751
	assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
1753
 
1752
 
1754
	if (need_tiling(sna, width, height)) {
1753
	if (need_tiling(sna, width, height)) {
1755
		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
1754
		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
1756
		     __FUNCTION__, width, height));
1755
		     __FUNCTION__, width, height));
1757
		return sna_tiling_composite_spans(op, src, dst,
1756
		return sna_tiling_composite_spans(op, src, dst,
1758
						  src_x, src_y, dst_x, dst_y,
1757
						  src_x, src_y, dst_x, dst_y,
1759
						  width, height, flags, tmp);
1758
						  width, height, flags, tmp);
1760
	}
1759
	}
1761
 
1760
 
1762
	tmp->base.op = op;
1761
	tmp->base.op = op;
1763
	if (!gen5_composite_set_target(sna, &tmp->base, dst,
1762
	if (!gen5_composite_set_target(sna, &tmp->base, dst,
1764
				       dst_x, dst_y, width, height,
1763
				       dst_x, dst_y, width, height,
1765
				       true))
1764
				       true))
1766
		return false;
1765
		return false;
1767
 
1766
 
1768
	switch (gen5_composite_picture(sna, src, &tmp->base.src,
1767
	switch (gen5_composite_picture(sna, src, &tmp->base.src,
1769
				       src_x, src_y,
1768
				       src_x, src_y,
1770
				       width, height,
1769
				       width, height,
1771
				       dst_x, dst_y,
1770
				       dst_x, dst_y,
1772
				       dst->polyMode == PolyModePrecise)) {
1771
				       dst->polyMode == PolyModePrecise)) {
1773
	case -1:
1772
	case -1:
1774
		goto cleanup_dst;
1773
		goto cleanup_dst;
1775
	case 0:
1774
	case 0:
1776
		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
1775
		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
1777
			goto cleanup_dst;
1776
			goto cleanup_dst;
1778
		/* fall through to fixup */
1777
		/* fall through to fixup */
1779
	case 1:
1778
	case 1:
1780
		gen5_composite_channel_convert(&tmp->base.src);
1779
		gen5_composite_channel_convert(&tmp->base.src);
1781
		break;
1780
		break;
1782
	}
1781
	}
1783
 
1782
 
1784
	tmp->base.mask.bo = NULL;
1783
	tmp->base.mask.bo = NULL;
1785
 
1784
 
1786
	tmp->base.is_affine = tmp->base.src.is_affine;
1785
	tmp->base.is_affine = tmp->base.src.is_affine;
1787
	tmp->base.has_component_alpha = false;
1786
	tmp->base.has_component_alpha = false;
1788
	tmp->base.need_magic_ca_pass = false;
1787
	tmp->base.need_magic_ca_pass = false;
1789
 
1788
 
1790
	tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
1789
	tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
1791
	tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
1790
	tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
1792
 
1791
 
1793
	tmp->box   = gen5_render_composite_spans_box;
1792
	tmp->box   = gen5_render_composite_spans_box;
1794
	tmp->boxes = gen5_render_composite_spans_boxes;
1793
	tmp->boxes = gen5_render_composite_spans_boxes;
1795
	if (tmp->emit_boxes)
1794
	if (tmp->emit_boxes)
1796
		tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
1795
		tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
1797
	tmp->done  = gen5_render_composite_spans_done;
1796
	tmp->done  = gen5_render_composite_spans_done;
1798
 
1797
 
1799
	if (!kgem_check_bo(&sna->kgem,
1798
	if (!kgem_check_bo(&sna->kgem,
1800
			   tmp->base.dst.bo, tmp->base.src.bo,
1799
			   tmp->base.dst.bo, tmp->base.src.bo,
1801
			   NULL))  {
1800
			   NULL))  {
1802
		kgem_submit(&sna->kgem);
1801
		kgem_submit(&sna->kgem);
1803
		if (!kgem_check_bo(&sna->kgem,
1802
		if (!kgem_check_bo(&sna->kgem,
1804
				   tmp->base.dst.bo, tmp->base.src.bo,
1803
				   tmp->base.dst.bo, tmp->base.src.bo,
1805
				   NULL))
1804
				   NULL))
1806
			goto cleanup_src;
1805
			goto cleanup_src;
1807
	}
1806
	}
1808
 
-
 
1809
	gen5_bind_surfaces(sna, &tmp->base);
1807
 
-
 
1808
	gen5_align_vertex(sna, &tmp->base);
1810
	gen5_align_vertex(sna, &tmp->base);
1809
	gen5_bind_surfaces(sna, &tmp->base);
1811
	return true;
1810
	return true;
1812
 
1811
 
1813
cleanup_src:
1812
cleanup_src:
1814
	if (tmp->base.src.bo)
1813
	if (tmp->base.src.bo)
1815
		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
1814
		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
1816
cleanup_dst:
1815
cleanup_dst:
1817
	if (tmp->base.redirect.real_bo)
1816
	if (tmp->base.redirect.real_bo)
1818
		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
1817
		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
1819
	return false;
1818
	return false;
1820
}
1819
}
1821
#endif
1820
#endif
1822
 
1821
 
1823
 
1822
 
1824
 
1823
 
1825
static bool
1824
static bool
1826
gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
1825
gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
1827
		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
1826
		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
1828
		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1827
		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1829
		       const BoxRec *box, int n, unsigned flags)
1828
		       const BoxRec *box, int n, unsigned flags)
1830
{
1829
{
1831
	struct sna_composite_op tmp;
1830
	struct sna_composite_op tmp;
1832
 
1831
 
1833
	DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
1832
	DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
1834
	     __FUNCTION__, alu,
1833
	     __FUNCTION__, alu,
1835
	     src->drawable.serialNumber, src_bo->handle,
1834
	     src->drawable.serialNumber, src_bo->handle,
1836
	     dst->drawable.serialNumber, dst_bo->handle,
1835
	     dst->drawable.serialNumber, dst_bo->handle,
1837
	     n, box->x1, box->y1, box->x2, box->y2,
1836
	     n, box->x1, box->y1, box->x2, box->y2,
1838
	     flags));
1837
	     flags));
1839
 
1838
 
1840
	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
1839
	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
1841
	    sna_blt_copy_boxes(sna, alu,
1840
	    sna_blt_copy_boxes(sna, alu,
1842
			       src_bo, src_dx, src_dy,
1841
			       src_bo, src_dx, src_dy,
1843
			       dst_bo, dst_dx, dst_dy,
1842
			       dst_bo, dst_dx, dst_dy,
1844
			       dst->drawable.bitsPerPixel,
1843
			       dst->drawable.bitsPerPixel,
1845
			       box, n))
1844
			       box, n))
1846
		return true;
1845
		return true;
1847
 
1846
 
1848
	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
1847
	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
1849
fallback_blt:
1848
fallback_blt:
1850
		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
1849
		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
1851
			return false;
1850
			return false;
1852
 
1851
 
1853
		return sna_blt_copy_boxes_fallback(sna, alu,
1852
		return sna_blt_copy_boxes_fallback(sna, alu,
1854
						   src, src_bo, src_dx, src_dy,
1853
						   src, src_bo, src_dx, src_dy,
1855
						   dst, dst_bo, dst_dx, dst_dy,
1854
						   dst, dst_bo, dst_dx, dst_dy,
1856
						   box, n);
1855
						   box, n);
1857
	}
1856
	}
1858
 
1857
 
1859
	memset(&tmp, 0, sizeof(tmp));
1858
	memset(&tmp, 0, sizeof(tmp));
1860
 
1859
 
1861
	if (dst->drawable.depth == src->drawable.depth) {
1860
	if (dst->drawable.depth == src->drawable.depth) {
1862
		tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
1861
		tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
1863
		tmp.src.pict_format = tmp.dst.format;
1862
		tmp.src.pict_format = tmp.dst.format;
1864
	} else {
1863
	} else {
1865
		tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
1864
		tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
1866
		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
1865
		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
1867
	}
1866
	}
1868
	if (!gen5_check_format(tmp.src.pict_format)) {
1867
	if (!gen5_check_format(tmp.src.pict_format)) {
1869
		DBG(("%s: unsupported source format, %x, use BLT\n",
1868
		DBG(("%s: unsupported source format, %x, use BLT\n",
1870
		     __FUNCTION__, tmp.src.pict_format));
1869
		     __FUNCTION__, tmp.src.pict_format));
1871
		goto fallback_blt;
1870
		goto fallback_blt;
1872
	}
1871
	}
1873
 
1872
 
1874
	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
1873
	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
1875
	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
1874
	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
1876
 
1875
 
1877
	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
1876
	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
1878
 
1877
 
1879
	tmp.dst.pixmap = dst;
1878
	tmp.dst.pixmap = dst;
1880
	tmp.dst.width  = dst->drawable.width;
1879
	tmp.dst.width  = dst->drawable.width;
1881
	tmp.dst.height = dst->drawable.height;
1880
	tmp.dst.height = dst->drawable.height;
1882
	tmp.dst.x = tmp.dst.y = 0;
1881
	tmp.dst.x = tmp.dst.y = 0;
1883
	tmp.dst.bo = dst_bo;
1882
	tmp.dst.bo = dst_bo;
1884
	tmp.damage = NULL;
1883
	tmp.damage = NULL;
1885
 
1884
 
1886
	sna_render_composite_redirect_init(&tmp);
1885
	sna_render_composite_redirect_init(&tmp);
1887
	if (too_large(tmp.dst.width, tmp.dst.height)) {
1886
	if (too_large(tmp.dst.width, tmp.dst.height)) {
1888
		BoxRec extents = box[0];
1887
		BoxRec extents = box[0];
1889
		int i;
1888
		int i;
1890
 
1889
 
1891
		for (i = 1; i < n; i++) {
1890
		for (i = 1; i < n; i++) {
1892
			if (box[i].x1 < extents.x1)
1891
			if (box[i].x1 < extents.x1)
1893
				extents.x1 = box[i].x1;
1892
				extents.x1 = box[i].x1;
1894
			if (box[i].y1 < extents.y1)
1893
			if (box[i].y1 < extents.y1)
1895
				extents.y1 = box[i].y1;
1894
				extents.y1 = box[i].y1;
1896
 
1895
 
1897
			if (box[i].x2 > extents.x2)
1896
			if (box[i].x2 > extents.x2)
1898
				extents.x2 = box[i].x2;
1897
				extents.x2 = box[i].x2;
1899
			if (box[i].y2 > extents.y2)
1898
			if (box[i].y2 > extents.y2)
1900
				extents.y2 = box[i].y2;
1899
				extents.y2 = box[i].y2;
1901
		}
1900
		}
1902
		if (!sna_render_composite_redirect(sna, &tmp,
1901
		if (!sna_render_composite_redirect(sna, &tmp,
1903
						   extents.x1 + dst_dx,
1902
						   extents.x1 + dst_dx,
1904
						   extents.y1 + dst_dy,
1903
						   extents.y1 + dst_dy,
1905
						   extents.x2 - extents.x1,
1904
						   extents.x2 - extents.x1,
1906
						   extents.y2 - extents.y1,
1905
						   extents.y2 - extents.y1,
1907
						   n > 1))
1906
						   n > 1))
1908
			goto fallback_tiled;
1907
			goto fallback_tiled;
1909
	}
1908
	}
1910
 
1909
 
1911
	tmp.src.filter = SAMPLER_FILTER_NEAREST;
1910
	tmp.src.filter = SAMPLER_FILTER_NEAREST;
1912
	tmp.src.repeat = SAMPLER_EXTEND_NONE;
1911
	tmp.src.repeat = SAMPLER_EXTEND_NONE;
1913
	tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
1912
	tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
1914
	if (too_large(src->drawable.width, src->drawable.height)) {
1913
	if (too_large(src->drawable.width, src->drawable.height)) {
1915
		BoxRec extents = box[0];
1914
		BoxRec extents = box[0];
1916
		int i;
1915
		int i;
1917
 
1916
 
1918
		for (i = 1; i < n; i++) {
1917
		for (i = 1; i < n; i++) {
1919
			if (box[i].x1 < extents.x1)
1918
			if (box[i].x1 < extents.x1)
1920
				extents.x1 = box[i].x1;
1919
				extents.x1 = box[i].x1;
1921
			if (box[i].y1 < extents.y1)
1920
			if (box[i].y1 < extents.y1)
1922
				extents.y1 = box[i].y1;
1921
				extents.y1 = box[i].y1;
1923
 
1922
 
1924
			if (box[i].x2 > extents.x2)
1923
			if (box[i].x2 > extents.x2)
1925
				extents.x2 = box[i].x2;
1924
				extents.x2 = box[i].x2;
1926
			if (box[i].y2 > extents.y2)
1925
			if (box[i].y2 > extents.y2)
1927
				extents.y2 = box[i].y2;
1926
				extents.y2 = box[i].y2;
1928
		}
1927
		}
1929
 
1928
 
1930
		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
1929
		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
1931
					       extents.x1 + src_dx,
1930
					       extents.x1 + src_dx,
1932
					       extents.y1 + src_dy,
1931
					       extents.y1 + src_dy,
1933
					       extents.x2 - extents.x1,
1932
					       extents.x2 - extents.x1,
1934
					       extents.y2 - extents.y1))
1933
					       extents.y2 - extents.y1))
1935
			goto fallback_tiled_dst;
1934
			goto fallback_tiled_dst;
1936
	} else {
1935
	} else {
1937
		tmp.src.bo = kgem_bo_reference(src_bo);
1936
		tmp.src.bo = kgem_bo_reference(src_bo);
1938
		tmp.src.width  = src->drawable.width;
1937
		tmp.src.width  = src->drawable.width;
1939
		tmp.src.height = src->drawable.height;
1938
		tmp.src.height = src->drawable.height;
1940
		tmp.src.offset[0] = tmp.src.offset[1] = 0;
1939
		tmp.src.offset[0] = tmp.src.offset[1] = 0;
1941
		tmp.src.scale[0] = 1.f/src->drawable.width;
1940
		tmp.src.scale[0] = 1.f/src->drawable.width;
1942
		tmp.src.scale[1] = 1.f/src->drawable.height;
1941
		tmp.src.scale[1] = 1.f/src->drawable.height;
1943
	}
1942
	}
1944
 
1943
 
1945
	tmp.is_affine = true;
1944
	tmp.is_affine = true;
1946
	tmp.floats_per_vertex = 3;
1945
	tmp.floats_per_vertex = 3;
1947
	tmp.floats_per_rect = 9;
1946
	tmp.floats_per_rect = 9;
1948
	tmp.u.gen5.wm_kernel = WM_KERNEL;
1947
	tmp.u.gen5.wm_kernel = WM_KERNEL;
1949
	tmp.u.gen5.ve_id = 2;
1948
	tmp.u.gen5.ve_id = 2;
1950
 
1949
 
1951
	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
1950
	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
1952
		kgem_submit(&sna->kgem);
1951
		kgem_submit(&sna->kgem);
1953
		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
1952
		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
1954
			DBG(("%s: aperture check failed\n", __FUNCTION__));
1953
			DBG(("%s: aperture check failed\n", __FUNCTION__));
-
 
1954
			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
-
 
1955
			if (tmp.redirect.real_bo)
-
 
1956
				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
1955
			goto fallback_tiled_src;
1957
			goto fallback_blt;
1956
		}
1958
		}
1957
	}
1959
	}
1958
 
1960
 
1959
	dst_dx += tmp.dst.x;
1961
	dst_dx += tmp.dst.x;
1960
	dst_dy += tmp.dst.y;
1962
	dst_dy += tmp.dst.y;
1961
	tmp.dst.x = tmp.dst.y = 0;
1963
	tmp.dst.x = tmp.dst.y = 0;
1962
 
1964
 
1963
	src_dx += tmp.src.offset[0];
1965
	src_dx += tmp.src.offset[0];
1964
	src_dy += tmp.src.offset[1];
1966
	src_dy += tmp.src.offset[1];
1965
 
-
 
1966
	gen5_copy_bind_surfaces(sna, &tmp);
1967
 
-
 
1968
	gen5_align_vertex(sna, &tmp);
1967
	gen5_align_vertex(sna, &tmp);
1969
	gen5_copy_bind_surfaces(sna, &tmp);
1968
 
1970
 
1969
	do {
1971
	do {
1970
		int n_this_time;
1972
		int n_this_time;
1971
 
1973
 
1972
		n_this_time = gen5_get_rectangles(sna, &tmp, n,
1974
		n_this_time = gen5_get_rectangles(sna, &tmp, n,
1973
						  gen5_copy_bind_surfaces);
1975
						  gen5_copy_bind_surfaces);
1974
		n -= n_this_time;
1976
		n -= n_this_time;
1975
 
1977
 
1976
		do {
1978
		do {
1977
			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
1979
			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
1978
			     box->x1 + src_dx, box->y1 + src_dy,
1980
			     box->x1 + src_dx, box->y1 + src_dy,
1979
			     box->x1 + dst_dx, box->y1 + dst_dy,
1981
			     box->x1 + dst_dx, box->y1 + dst_dy,
1980
			     box->x2 - box->x1, box->y2 - box->y1));
1982
			     box->x2 - box->x1, box->y2 - box->y1));
1981
			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
1983
			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
1982
			OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
1984
			OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
1983
			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
1985
			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
1984
 
1986
 
1985
			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
1987
			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
1986
			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
1988
			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
1987
			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
1989
			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
1988
 
1990
 
1989
			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
1991
			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
1990
			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
1992
			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
1991
			OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
1993
			OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
1992
 
1994
 
1993
			box++;
1995
			box++;
1994
		} while (--n_this_time);
1996
		} while (--n_this_time);
1995
	} while (n);
1997
	} while (n);
1996
 
1998
 
1997
	gen4_vertex_flush(sna);
1999
	gen4_vertex_flush(sna);
1998
	sna_render_composite_redirect_done(sna, &tmp);
2000
	sna_render_composite_redirect_done(sna, &tmp);
1999
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2001
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2000
	return true;
2002
	return true;
2001
 
-
 
2002
fallback_tiled_src:
-
 
2003
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2003
 
2004
fallback_tiled_dst:
2004
fallback_tiled_dst:
2005
	if (tmp.redirect.real_bo)
2005
	if (tmp.redirect.real_bo)
2006
		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2006
		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2007
fallback_tiled:
2007
fallback_tiled:
2008
	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2008
	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2009
	    sna_blt_copy_boxes(sna, alu,
2009
	    sna_blt_copy_boxes(sna, alu,
2010
			       src_bo, src_dx, src_dy,
2010
			       src_bo, src_dx, src_dy,
2011
			       dst_bo, dst_dx, dst_dy,
2011
			       dst_bo, dst_dx, dst_dy,
2012
			       dst->drawable.bitsPerPixel,
2012
			       dst->drawable.bitsPerPixel,
2013
			       box, n))
2013
			       box, n))
2014
		return true;
2014
		return true;
2015
 
2015
 
2016
	DBG(("%s: tiled fallback\n", __FUNCTION__));
2016
	DBG(("%s: tiled fallback\n", __FUNCTION__));
2017
	return sna_tiling_copy_boxes(sna, alu,
2017
	return sna_tiling_copy_boxes(sna, alu,
2018
				     src, src_bo, src_dx, src_dy,
2018
				     src, src_bo, src_dx, src_dy,
2019
				     dst, dst_bo, dst_dx, dst_dy,
2019
				     dst, dst_bo, dst_dx, dst_dy,
2020
				     box, n);
2020
				     box, n);
2021
}
2021
}
2022
 
2022
 
2023
#endif
2023
#endif
2024
 
-
 
2025
static void
-
 
2026
gen5_render_flush(struct sna *sna)
-
 
2027
{
-
 
2028
	gen4_vertex_close(sna);
-
 
2029
 
-
 
2030
	assert(sna->render.vb_id == 0);
-
 
2031
	assert(sna->render.vertex_offset == 0);
-
 
2032
}
-
 
2033
 
-
 
2034
static void
2024
static void
2035
gen5_render_context_switch(struct kgem *kgem,
2025
gen5_render_context_switch(struct kgem *kgem,
2036
			   int new_mode)
2026
			   int new_mode)
2037
{
2027
{
2038
	if (!kgem->nbatch)
2028
	if (!kgem->nbatch)
2039
		return;
2029
		return;
2040
 
2030
 
2041
	/* WaNonPipelinedStateCommandFlush
2031
	/* WaNonPipelinedStateCommandFlush
2042
	 *
2032
	 *
2043
	 * Ironlake has a limitation that a 3D or Media command can't
2033
	 * Ironlake has a limitation that a 3D or Media command can't
2044
	 * be the first command after a BLT, unless it's
2034
	 * be the first command after a BLT, unless it's
2045
	 * non-pipelined.
2035
	 * non-pipelined.
2046
	 *
2036
	 *
2047
	 * We do this by ensuring that the non-pipelined drawrect
2037
	 * We do this by ensuring that the non-pipelined drawrect
2048
	 * is always emitted first following a switch from BLT.
2038
	 * is always emitted first following a switch from BLT.
2049
	 */
2039
	 */
2050
	if (kgem->mode == KGEM_BLT) {
2040
	if (kgem->mode == KGEM_BLT) {
2051
		struct sna *sna = to_sna_from_kgem(kgem);
2041
		struct sna *sna = to_sna_from_kgem(kgem);
2052
		DBG(("%s: forcing drawrect on next state emission\n",
2042
		DBG(("%s: forcing drawrect on next state emission\n",
2053
		     __FUNCTION__));
2043
		     __FUNCTION__));
2054
		sna->render_state.gen5.drawrect_limit = -1;
2044
		sna->render_state.gen5.drawrect_limit = -1;
2055
	}
2045
	}
2056
 
2046
 
2057
	if (kgem_ring_is_idle(kgem, kgem->ring)) {
2047
	if (kgem_ring_is_idle(kgem, kgem->ring)) {
2058
		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
2048
		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
2059
		_kgem_submit(kgem);
2049
		_kgem_submit(kgem);
2060
	}
2050
	}
2061
}
2051
}
2062
 
-
 
2063
static void
-
 
2064
discard_vbo(struct sna *sna)
-
 
2065
{
-
 
2066
	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
-
 
2067
	sna->render.vbo = NULL;
-
 
2068
	sna->render.vertices = sna->render.vertex_data;
-
 
2069
	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
-
 
2070
	sna->render.vertex_used = 0;
-
 
2071
	sna->render.vertex_index = 0;
-
 
2072
}
-
 
2073
 
-
 
2074
static void
-
 
2075
gen5_render_retire(struct kgem *kgem)
-
 
2076
{
-
 
2077
	struct sna *sna;
-
 
2078
 
-
 
2079
	sna = container_of(kgem, struct sna, kgem);
-
 
2080
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
-
 
2081
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
-
 
2082
		sna->render.vertex_used = 0;
-
 
2083
		sna->render.vertex_index = 0;
-
 
2084
	}
-
 
2085
}
-
 
2086
 
-
 
2087
static void
-
 
2088
gen5_render_expire(struct kgem *kgem)
-
 
2089
{
-
 
2090
	struct sna *sna;
-
 
2091
 
-
 
2092
	sna = container_of(kgem, struct sna, kgem);
-
 
2093
	if (sna->render.vbo && !sna->render.vertex_used) {
-
 
2094
		DBG(("%s: discarding vbo\n", __FUNCTION__));
-
 
2095
		discard_vbo(sna);
-
 
2096
	}
-
 
2097
}
-
 
2098
 
2052
 
2099
static void gen5_render_reset(struct sna *sna)
2053
static void gen5_render_reset(struct sna *sna)
2100
{
2054
{
2101
	sna->render_state.gen5.needs_invariant = true;
2055
	sna->render_state.gen5.needs_invariant = true;
2102
	sna->render_state.gen5.ve_id = -1;
2056
	sna->render_state.gen5.ve_id = -1;
2103
	sna->render_state.gen5.last_primitive = -1;
2057
	sna->render_state.gen5.last_primitive = -1;
2104
	sna->render_state.gen5.last_pipelined_pointers = 0;
2058
	sna->render_state.gen5.last_pipelined_pointers = 0;
2105
 
2059
 
2106
	sna->render_state.gen5.drawrect_offset = -1;
2060
	sna->render_state.gen5.drawrect_offset = -1;
2107
	sna->render_state.gen5.drawrect_limit = -1;
2061
	sna->render_state.gen5.drawrect_limit = -1;
2108
	sna->render_state.gen5.surface_table = -1;
2062
	sna->render_state.gen5.surface_table = -1;
2109
 
-
 
2110
	if (sna->render.vbo &&
2063
 
2111
	    !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
2064
	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
2112
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
2065
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
2113
		discard_vbo(sna);
2066
		discard_vbo(sna);
2114
	}
2067
	}
2115
 
2068
 
2116
	sna->render.vertex_offset = 0;
2069
	sna->render.vertex_offset = 0;
2117
	sna->render.nvertex_reloc = 0;
2070
	sna->render.nvertex_reloc = 0;
2118
	sna->render.vb_id = 0;
2071
	sna->render.vb_id = 0;
2119
}
2072
}
2120
 
2073
 
2121
static void gen5_render_fini(struct sna *sna)
2074
static void gen5_render_fini(struct sna *sna)
2122
{
2075
{
2123
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
2076
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
2124
}
2077
}
2125
 
2078
 
2126
static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
2079
static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
2127
{
2080
{
2128
	struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
2081
	struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
2129
 
2082
 
2130
	/* Set up the vertex shader to be disabled (passthrough) */
2083
	/* Set up the vertex shader to be disabled (passthrough) */
2131
	vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
2084
	vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
2132
	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
2085
	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
2133
	vs->vs6.vs_enable = 0;
2086
	vs->vs6.vs_enable = 0;
2134
	vs->vs6.vert_cache_disable = 1;
2087
	vs->vs6.vert_cache_disable = 1;
2135
 
2088
 
2136
	return sna_static_stream_offsetof(stream, vs);
2089
	return sna_static_stream_offsetof(stream, vs);
2137
}
2090
}
2138
 
2091
 
2139
static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
2092
static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
2140
				     uint32_t kernel)
2093
				     uint32_t kernel)
2141
{
2094
{
2142
	struct gen5_sf_unit_state *sf_state;
2095
	struct gen5_sf_unit_state *sf_state;
2143
 
2096
 
2144
	sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
2097
	sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
2145
 
2098
 
2146
	sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
2099
	sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
2147
	sf_state->thread0.kernel_start_pointer = kernel >> 6;
2100
	sf_state->thread0.kernel_start_pointer = kernel >> 6;
2148
 
2101
 
2149
	sf_state->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
2102
	sf_state->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
2150
	sf_state->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
2103
	sf_state->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
2151
	sf_state->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
2104
	sf_state->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
2152
	/* don't smash vertex header, read start from dw8 */
2105
	/* don't smash vertex header, read start from dw8 */
2153
	sf_state->thread3.urb_entry_read_offset = 1;
2106
	sf_state->thread3.urb_entry_read_offset = 1;
2154
	sf_state->thread3.dispatch_grf_start_reg = 3;
2107
	sf_state->thread3.dispatch_grf_start_reg = 3;
2155
	sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
2108
	sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
2156
	sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
2109
	sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
2157
	sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
2110
	sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
2158
	sf_state->sf5.viewport_transform = false;	/* skip viewport */
2111
	sf_state->sf5.viewport_transform = false;	/* skip viewport */
2159
	sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
2112
	sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
2160
	sf_state->sf6.scissor = 0;
2113
	sf_state->sf6.scissor = 0;
2161
	sf_state->sf7.trifan_pv = 2;
2114
	sf_state->sf7.trifan_pv = 2;
2162
	sf_state->sf6.dest_org_vbias = 0x8;
2115
	sf_state->sf6.dest_org_vbias = 0x8;
2163
	sf_state->sf6.dest_org_hbias = 0x8;
2116
	sf_state->sf6.dest_org_hbias = 0x8;
2164
 
2117
 
2165
	return sna_static_stream_offsetof(stream, sf_state);
2118
	return sna_static_stream_offsetof(stream, sf_state);
2166
}
2119
}
2167
 
2120
 
2168
static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
2121
static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
2169
					  sampler_filter_t src_filter,
2122
					  sampler_filter_t src_filter,
2170
					  sampler_extend_t src_extend,
2123
					  sampler_extend_t src_extend,
2171
					  sampler_filter_t mask_filter,
2124
					  sampler_filter_t mask_filter,
2172
					  sampler_extend_t mask_extend)
2125
					  sampler_extend_t mask_extend)
2173
{
2126
{
2174
	struct gen5_sampler_state *sampler_state;
2127
	struct gen5_sampler_state *sampler_state;
2175
 
2128
 
2176
	sampler_state = sna_static_stream_map(stream,
2129
	sampler_state = sna_static_stream_map(stream,
2177
					      sizeof(struct gen5_sampler_state) * 2,
2130
					      sizeof(struct gen5_sampler_state) * 2,
2178
					      32);
2131
					      32);
2179
	sampler_state_init(&sampler_state[0], src_filter, src_extend);
2132
	sampler_state_init(&sampler_state[0], src_filter, src_extend);
2180
	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
2133
	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
2181
 
2134
 
2182
	return sna_static_stream_offsetof(stream, sampler_state);
2135
	return sna_static_stream_offsetof(stream, sampler_state);
2183
}
2136
}
2184
 
2137
 
2185
static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
2138
static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
2186
			       bool has_mask,
2139
			       bool has_mask,
2187
			       uint32_t kernel,
2140
			       uint32_t kernel,
2188
			       uint32_t sampler)
2141
			       uint32_t sampler)
2189
{
2142
{
2190
	state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
2143
	state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
2191
	state->thread0.kernel_start_pointer = kernel >> 6;
2144
	state->thread0.kernel_start_pointer = kernel >> 6;
2192
 
2145
 
2193
	state->thread1.single_program_flow = 0;
2146
	state->thread1.single_program_flow = 0;
2194
 
2147
 
2195
	/* scratch space is not used in our kernel */
2148
	/* scratch space is not used in our kernel */
2196
	state->thread2.scratch_space_base_pointer = 0;
2149
	state->thread2.scratch_space_base_pointer = 0;
2197
	state->thread2.per_thread_scratch_space = 0;
2150
	state->thread2.per_thread_scratch_space = 0;
2198
 
2151
 
2199
	state->thread3.const_urb_entry_read_length = 0;
2152
	state->thread3.const_urb_entry_read_length = 0;
2200
	state->thread3.const_urb_entry_read_offset = 0;
2153
	state->thread3.const_urb_entry_read_offset = 0;
2201
 
2154
 
2202
	state->thread3.urb_entry_read_offset = 0;
2155
	state->thread3.urb_entry_read_offset = 0;
2203
	/* wm kernel use urb from 3, see wm_program in compiler module */
2156
	/* wm kernel use urb from 3, see wm_program in compiler module */
2204
	state->thread3.dispatch_grf_start_reg = 3;	/* must match kernel */
2157
	state->thread3.dispatch_grf_start_reg = 3;	/* must match kernel */
2205
 
2158
 
2206
	state->wm4.sampler_count = 0;	/* hardware requirement */
2159
	state->wm4.sampler_count = 0;	/* hardware requirement */
2207
 
2160
 
2208
	state->wm4.sampler_state_pointer = sampler >> 5;
2161
	state->wm4.sampler_state_pointer = sampler >> 5;
2209
	state->wm5.max_threads = PS_MAX_THREADS - 1;
2162
	state->wm5.max_threads = PS_MAX_THREADS - 1;
2210
	state->wm5.transposed_urb_read = 0;
2163
	state->wm5.transposed_urb_read = 0;
2211
	state->wm5.thread_dispatch_enable = 1;
2164
	state->wm5.thread_dispatch_enable = 1;
2212
	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
2165
	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
2213
	 * start point
2166
	 * start point
2214
	 */
2167
	 */
2215
	state->wm5.enable_16_pix = 1;
2168
	state->wm5.enable_16_pix = 1;
2216
	state->wm5.enable_8_pix = 0;
2169
	state->wm5.enable_8_pix = 0;
2217
	state->wm5.early_depth_test = 1;
2170
	state->wm5.early_depth_test = 1;
2218
 
2171
 
2219
	/* Each pair of attributes (src/mask coords) is two URB entries */
2172
	/* Each pair of attributes (src/mask coords) is two URB entries */
2220
	if (has_mask) {
2173
	if (has_mask) {
2221
		state->thread1.binding_table_entry_count = 3;	/* 2 tex and fb */
2174
		state->thread1.binding_table_entry_count = 3;	/* 2 tex and fb */
2222
		state->thread3.urb_entry_read_length = 4;
2175
		state->thread3.urb_entry_read_length = 4;
2223
	} else {
2176
	} else {
2224
		state->thread1.binding_table_entry_count = 2;	/* 1 tex and fb */
2177
		state->thread1.binding_table_entry_count = 2;	/* 1 tex and fb */
2225
		state->thread3.urb_entry_read_length = 2;
2178
		state->thread3.urb_entry_read_length = 2;
2226
	}
2179
	}
2227
 
2180
 
2228
	/* binding table entry count is only used for prefetching,
2181
	/* binding table entry count is only used for prefetching,
2229
	 * and it has to be set 0 for Ironlake
2182
	 * and it has to be set 0 for Ironlake
2230
	 */
2183
	 */
2231
	state->thread1.binding_table_entry_count = 0;
2184
	state->thread1.binding_table_entry_count = 0;
2232
}
2185
}
2233
 
2186
 
2234
static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
2187
static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
2235
{
2188
{
2236
	uint8_t *ptr, *base;
2189
	uint8_t *ptr, *base;
2237
	int i, j;
2190
	int i, j;
2238
 
2191
 
2239
	base = ptr =
2192
	base = ptr =
2240
		sna_static_stream_map(stream,
2193
		sna_static_stream_map(stream,
2241
				      GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
2194
				      GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
2242
				      64);
2195
				      64);
2243
 
2196
 
2244
	for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
2197
	for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
2245
		for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
2198
		for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
2246
			struct gen5_cc_unit_state *state =
2199
			struct gen5_cc_unit_state *state =
2247
				(struct gen5_cc_unit_state *)ptr;
2200
				(struct gen5_cc_unit_state *)ptr;
2248
 
2201
 
2249
			state->cc3.blend_enable =
2202
			state->cc3.blend_enable =
2250
				!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
2203
				!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
2251
 
2204
 
2252
			state->cc5.logicop_func = 0xc;	/* COPY */
2205
			state->cc5.logicop_func = 0xc;	/* COPY */
2253
			state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
2206
			state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
2254
 
2207
 
2255
			/* Fill in alpha blend factors same as color, for the future. */
2208
			/* Fill in alpha blend factors same as color, for the future. */
2256
			state->cc5.ia_src_blend_factor = i;
2209
			state->cc5.ia_src_blend_factor = i;
2257
			state->cc5.ia_dest_blend_factor = j;
2210
			state->cc5.ia_dest_blend_factor = j;
2258
 
2211
 
2259
			state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
2212
			state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
2260
			state->cc6.clamp_post_alpha_blend = 1;
2213
			state->cc6.clamp_post_alpha_blend = 1;
2261
			state->cc6.clamp_pre_alpha_blend = 1;
2214
			state->cc6.clamp_pre_alpha_blend = 1;
2262
			state->cc6.src_blend_factor = i;
2215
			state->cc6.src_blend_factor = i;
2263
			state->cc6.dest_blend_factor = j;
2216
			state->cc6.dest_blend_factor = j;
2264
 
2217
 
2265
			ptr += 64;
2218
			ptr += 64;
2266
		}
2219
		}
2267
	}
2220
	}
2268
 
2221
 
2269
	return sna_static_stream_offsetof(stream, base);
2222
	return sna_static_stream_offsetof(stream, base);
2270
}
2223
}
2271
 
2224
 
2272
static bool gen5_render_setup(struct sna *sna)
2225
static bool gen5_render_setup(struct sna *sna)
2273
{
2226
{
2274
	struct gen5_render_state *state = &sna->render_state.gen5;
2227
	struct gen5_render_state *state = &sna->render_state.gen5;
2275
	struct sna_static_stream general;
2228
	struct sna_static_stream general;
2276
	struct gen5_wm_unit_state_padded *wm_state;
2229
	struct gen5_wm_unit_state_padded *wm_state;
2277
	uint32_t sf[2], wm[KERNEL_COUNT];
2230
	uint32_t sf[2], wm[KERNEL_COUNT];
2278
	int i, j, k, l, m;
2231
	int i, j, k, l, m;
2279
 
2232
 
2280
	sna_static_stream_init(&general);
2233
	sna_static_stream_init(&general);
2281
 
2234
 
2282
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2235
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2283
	 * dumps, you know it points to zero.
2236
	 * dumps, you know it points to zero.
2284
	 */
2237
	 */
2285
	null_create(&general);
2238
	null_create(&general);
2286
 
2239
 
2287
	/* Set up the two SF states (one for blending with a mask, one without) */
2240
	/* Set up the two SF states (one for blending with a mask, one without) */
2288
	sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
2241
	sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
2289
	sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
2242
	sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
2290
 
2243
 
2291
	for (m = 0; m < KERNEL_COUNT; m++) {
2244
	for (m = 0; m < KERNEL_COUNT; m++) {
2292
		if (wm_kernels[m].size) {
2245
		if (wm_kernels[m].size) {
2293
			wm[m] = sna_static_stream_add(&general,
2246
			wm[m] = sna_static_stream_add(&general,
2294
						      wm_kernels[m].data,
2247
						      wm_kernels[m].data,
2295
						      wm_kernels[m].size,
2248
						      wm_kernels[m].size,
2296
						      64);
2249
						      64);
2297
		} else {
2250
		} else {
2298
			wm[m] = sna_static_stream_compile_wm(sna, &general,
2251
			wm[m] = sna_static_stream_compile_wm(sna, &general,
2299
							     wm_kernels[m].data,
2252
							     wm_kernels[m].data,
2300
							     16);
2253
							     16);
2301
		}
2254
		}
2302
		assert(wm[m]);
2255
		assert(wm[m]);
2303
	}
2256
	}
2304
 
2257
 
2305
	state->vs = gen5_create_vs_unit_state(&general);
2258
	state->vs = gen5_create_vs_unit_state(&general);
2306
 
2259
 
2307
	state->sf[0] = gen5_create_sf_state(&general, sf[0]);
2260
	state->sf[0] = gen5_create_sf_state(&general, sf[0]);
2308
	state->sf[1] = gen5_create_sf_state(&general, sf[1]);
2261
	state->sf[1] = gen5_create_sf_state(&general, sf[1]);
2309
 
2262
 
2310
 
2263
 
2311
	/* Set up the WM states: each filter/extend type for source and mask, per
2264
	/* Set up the WM states: each filter/extend type for source and mask, per
2312
	 * kernel.
2265
	 * kernel.
2313
	 */
2266
	 */
2314
	wm_state = sna_static_stream_map(&general,
2267
	wm_state = sna_static_stream_map(&general,
2315
					  sizeof(*wm_state) * KERNEL_COUNT *
2268
					  sizeof(*wm_state) * KERNEL_COUNT *
2316
					  FILTER_COUNT * EXTEND_COUNT *
2269
					  FILTER_COUNT * EXTEND_COUNT *
2317
					  FILTER_COUNT * EXTEND_COUNT,
2270
					  FILTER_COUNT * EXTEND_COUNT,
2318
					  64);
2271
					  64);
2319
	state->wm = sna_static_stream_offsetof(&general, wm_state);
2272
	state->wm = sna_static_stream_offsetof(&general, wm_state);
2320
	for (i = 0; i < FILTER_COUNT; i++) {
2273
	for (i = 0; i < FILTER_COUNT; i++) {
2321
		for (j = 0; j < EXTEND_COUNT; j++) {
2274
		for (j = 0; j < EXTEND_COUNT; j++) {
2322
			for (k = 0; k < FILTER_COUNT; k++) {
2275
			for (k = 0; k < FILTER_COUNT; k++) {
2323
				for (l = 0; l < EXTEND_COUNT; l++) {
2276
				for (l = 0; l < EXTEND_COUNT; l++) {
2324
					uint32_t sampler_state;
2277
					uint32_t sampler_state;
2325
 
2278
 
2326
					sampler_state =
2279
					sampler_state =
2327
						gen5_create_sampler_state(&general,
2280
						gen5_create_sampler_state(&general,
2328
									  i, j,
2281
									  i, j,
2329
									  k, l);
2282
									  k, l);
2330
 
2283
 
2331
					for (m = 0; m < KERNEL_COUNT; m++) {
2284
					for (m = 0; m < KERNEL_COUNT; m++) {
2332
						gen5_init_wm_state(&wm_state->state,
2285
						gen5_init_wm_state(&wm_state->state,
2333
								   wm_kernels[m].has_mask,
2286
								   wm_kernels[m].has_mask,
2334
								   wm[m], sampler_state);
2287
								   wm[m], sampler_state);
2335
						wm_state++;
2288
						wm_state++;
2336
					}
2289
					}
2337
				}
2290
				}
2338
        }
2291
        }
2339
        }
2292
        }
2340
    }
2293
    }
2341
 
2294
 
2342
    state->cc = gen5_create_cc_unit_state(&general);
2295
    state->cc = gen5_create_cc_unit_state(&general);
2343
 
2296
 
2344
    state->general_bo = sna_static_stream_fini(sna, &general);
2297
    state->general_bo = sna_static_stream_fini(sna, &general);
2345
    return state->general_bo != NULL;
2298
    return state->general_bo != NULL;
2346
}
2299
}
2347
 
2300
 
2348
const char *gen5_render_init(struct sna *sna, const char *backend)
2301
const char *gen5_render_init(struct sna *sna, const char *backend)
2349
{
2302
{
2350
	if (!gen5_render_setup(sna))
2303
	if (!gen5_render_setup(sna))
2351
		return backend;
2304
		return backend;
2352
 
2305
 
2353
	sna->kgem.context_switch = gen5_render_context_switch;
2306
	sna->kgem.context_switch = gen5_render_context_switch;
2354
	sna->kgem.retire = gen5_render_retire;
2307
	sna->kgem.retire = gen4_render_retire;
2355
	sna->kgem.expire = gen5_render_expire;
2308
	sna->kgem.expire = gen4_render_expire;
2356
 
2309
 
2357
#if 0
2310
#if 0
2358
#if !NO_COMPOSITE
2311
#if !NO_COMPOSITE
2359
	sna->render.composite = gen5_render_composite;
2312
	sna->render.composite = gen5_render_composite;
2360
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2313
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2361
#endif
2314
#endif
2362
#if !NO_COMPOSITE_SPANS
2315
#if !NO_COMPOSITE_SPANS
2363
	sna->render.check_composite_spans = gen5_check_composite_spans;
2316
	sna->render.check_composite_spans = gen5_check_composite_spans;
2364
	sna->render.composite_spans = gen5_render_composite_spans;
2317
	sna->render.composite_spans = gen5_render_composite_spans;
2365
	if (sna->PciInfo->device_id == 0x0044)
2318
	if (intel_get_device_id(sna->scrn) == 0x0044)
2366
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2319
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2367
#endif
2320
#endif
2368
	sna->render.video = gen5_render_video;
2321
	sna->render.video = gen5_render_video;
2369
 
2322
 
2370
	sna->render.copy_boxes = gen5_render_copy_boxes;
2323
	sna->render.copy_boxes = gen5_render_copy_boxes;
2371
	sna->render.copy = gen5_render_copy;
2324
	sna->render.copy = gen5_render_copy;
2372
 
2325
 
2373
	sna->render.fill_boxes = gen5_render_fill_boxes;
2326
	sna->render.fill_boxes = gen5_render_fill_boxes;
2374
	sna->render.fill = gen5_render_fill;
2327
	sna->render.fill = gen5_render_fill;
2375
	sna->render.fill_one = gen5_render_fill_one;
2328
	sna->render.fill_one = gen5_render_fill_one;
2376
#endif
2329
#endif
2377
 
2330
 
2378
    sna->render.blit_tex = gen5_blit_tex;
2331
    sna->render.blit_tex = gen5_blit_tex;
2379
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2332
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2380
 
2333
 
2381
	sna->render.flush = gen5_render_flush;
2334
	sna->render.flush = gen4_render_flush;
2382
	sna->render.reset = gen5_render_reset;
2335
	sna->render.reset = gen5_render_reset;
2383
	sna->render.fini = gen5_render_fini;
2336
	sna->render.fini = gen5_render_fini;
2384
 
2337
 
2385
	sna->render.max_3d_size = MAX_3D_SIZE;
2338
	sna->render.max_3d_size = MAX_3D_SIZE;
2386
	sna->render.max_3d_pitch = 1 << 18;
2339
	sna->render.max_3d_pitch = 1 << 18;
2387
	return "Ironlake (gen5)";
2340
	return "Ironlake (gen5)";
2388
};
2341
};
2389
 
2342
 
2390
static bool
2343
static bool
2391
gen5_blit_tex(struct sna *sna,
2344
gen5_blit_tex(struct sna *sna,
2392
              uint8_t op, bool scale,
2345
              uint8_t op, bool scale,
2393
		      PixmapPtr src, struct kgem_bo *src_bo,
2346
		      PixmapPtr src, struct kgem_bo *src_bo,
2394
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2347
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2395
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2348
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2396
              int32_t src_x, int32_t src_y,
2349
              int32_t src_x, int32_t src_y,
2397
              int32_t msk_x, int32_t msk_y,
2350
              int32_t msk_x, int32_t msk_y,
2398
              int32_t dst_x, int32_t dst_y,
2351
              int32_t dst_x, int32_t dst_y,
2399
              int32_t width, int32_t height,
2352
              int32_t width, int32_t height,
2400
              struct sna_composite_op *tmp)
2353
              struct sna_composite_op *tmp)
2401
{
2354
{
2402
	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2355
	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2403
	     width, height, sna->kgem.mode));
2356
	     width, height, sna->kgem.mode));
2404
 
2357
 
2405
    tmp->op = PictOpSrc;
2358
    tmp->op = PictOpSrc;
2406
 
2359
 
2407
    tmp->dst.pixmap = dst;
2360
    tmp->dst.pixmap = dst;
2408
    tmp->dst.bo     = dst_bo;
2361
    tmp->dst.bo     = dst_bo;
2409
    tmp->dst.width  = dst->drawable.width;
2362
    tmp->dst.width  = dst->drawable.width;
2410
    tmp->dst.height = dst->drawable.height;
2363
    tmp->dst.height = dst->drawable.height;
2411
    tmp->dst.format = PICT_x8r8g8b8;
2364
    tmp->dst.format = PICT_x8r8g8b8;
2412
 
2365
 
2413
 
2366
 
2414
	tmp->src.repeat = RepeatNone;
2367
	tmp->src.repeat = RepeatNone;
2415
	tmp->src.filter = PictFilterNearest;
2368
	tmp->src.filter = PictFilterNearest;
2416
    tmp->src.is_affine = true;
2369
    tmp->src.is_affine = true;
2417
 
2370
 
2418
    tmp->src.bo = src_bo;
2371
    tmp->src.bo = src_bo;
2419
	tmp->src.pict_format = PICT_x8r8g8b8;
2372
	tmp->src.pict_format = PICT_x8r8g8b8;
2420
    tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
2373
    tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
2421
    tmp->src.width  = src->drawable.width;
2374
    tmp->src.width  = src->drawable.width;
2422
    tmp->src.height = src->drawable.height;
2375
    tmp->src.height = src->drawable.height;
2423
 
2376
 
2424
 
2377
 
2425
    tmp->is_affine = tmp->src.is_affine;
2378
    tmp->is_affine = tmp->src.is_affine;
2426
	tmp->has_component_alpha = false;
2379
	tmp->has_component_alpha = false;
2427
	tmp->need_magic_ca_pass = false;
2380
	tmp->need_magic_ca_pass = false;
2428
 
2381
 
2429
    tmp->mask.is_affine = true;
2382
    tmp->mask.is_affine = true;
2430
 	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2383
 	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2431
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2384
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2432
    tmp->mask.bo = mask_bo;
2385
    tmp->mask.bo = mask_bo;
2433
    tmp->mask.pict_format = PIXMAN_a8;
2386
    tmp->mask.pict_format = PIXMAN_a8;
2434
    tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
2387
    tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
2435
    tmp->mask.width  = mask->drawable.width;
2388
    tmp->mask.width  = mask->drawable.width;
2436
    tmp->mask.height = mask->drawable.height;
2389
    tmp->mask.height = mask->drawable.height;
2437
 
2390
 
2438
    if( scale )
2391
    if( scale )
2439
    {
2392
    {
2440
        tmp->src.scale[0] = 1.f/width;
2393
        tmp->src.scale[0] = 1.f/width;
2441
        tmp->src.scale[1] = 1.f/height;
2394
        tmp->src.scale[1] = 1.f/height;
2442
    }
2395
    }
2443
    else
2396
    else
2444
    {
2397
    {
2445
        tmp->src.scale[0] = 1.f/src->drawable.width;
2398
        tmp->src.scale[0] = 1.f/src->drawable.width;
2446
        tmp->src.scale[1] = 1.f/src->drawable.height;
2399
        tmp->src.scale[1] = 1.f/src->drawable.height;
2447
    }
2400
    }
2448
 
2401
 
2449
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2402
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2450
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2403
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2451
 
2404
 
2452
 
2405
 
2453
    tmp->u.gen5.wm_kernel = WM_KERNEL_MASK;
2406
    tmp->u.gen5.wm_kernel = WM_KERNEL_MASK;
2454
 
2407
 
2455
//       gen5_choose_composite_kernel(tmp->op,
2408
//       gen5_choose_composite_kernel(tmp->op,
2456
//                        tmp->mask.bo != NULL,
2409
//                        tmp->mask.bo != NULL,
2457
//                        tmp->has_component_alpha,
2410
//                        tmp->has_component_alpha,
2458
//                        tmp->is_affine);
2411
//                        tmp->is_affine);
2459
	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
2412
	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
2460
 
2413
 
2461
	tmp->blt   = gen5_render_composite_blt;
2414
	tmp->blt   = gen5_render_composite_blt;
2462
	tmp->done  = gen5_render_composite_done;
2415
	tmp->done  = gen5_render_composite_done;
2463
 
2416
 
2464
	if (!kgem_check_bo(&sna->kgem,
2417
	if (!kgem_check_bo(&sna->kgem,
2465
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
2418
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
2466
		kgem_submit(&sna->kgem);
2419
		kgem_submit(&sna->kgem);
2467
	}
2420
	}
2468
 
-
 
2469
	gen5_bind_surfaces(sna, tmp);
2421
 
2470
	gen5_align_vertex(sna, tmp);
2422
	gen5_align_vertex(sna, tmp);
-
 
2423
	gen5_bind_surfaces(sna, tmp);
2471
	return true;
2424
 
2472
 
2425
	return true;
2473
}
2426
}