Subversion Repositories Kolibri OS

Rev

Rev 4359 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4359 Rev 4501
1
/*
1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
11
 *
12
 * The above copyright notice and this permission notice (including the next
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
14
 * Software.
15
 *
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
22
 * SOFTWARE.
23
 *
23
 *
24
 * Authors:
24
 * Authors:
25
 *    Wang Zhenyu 
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
26
 *    Eric Anholt 
27
 *    Carl Worth 
27
 *    Carl Worth 
28
 *    Keith Packard 
28
 *    Keith Packard 
29
 *    Chris Wilson 
29
 *    Chris Wilson 
30
 *
30
 *
31
 */
31
 */
32
 
32
 
33
#ifdef HAVE_CONFIG_H
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
34
#include "config.h"
35
#endif
35
#endif
36
 
36
 
37
#include "sna.h"
37
#include "sna.h"
38
#include "sna_reg.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
41
//#include "sna_video.h"
42
 
42
 
43
#include "brw/brw.h"
43
#include "brw/brw.h"
44
#include "gen7_render.h"
44
#include "gen7_render.h"
-
 
45
#include "gen4_common.h"
45
#include "gen4_source.h"
46
#include "gen4_source.h"
46
#include "gen4_vertex.h"
47
#include "gen4_vertex.h"
-
 
48
#include "gen6_common.h"
-
 
49
 
47
 
50
#define ALWAYS_INVALIDATE 0
-
 
51
#define ALWAYS_FLUSH 0
48
#define ALWAYS_FLUSH 0
52
#define ALWAYS_STALL 0
49
 
53
 
50
#define NO_COMPOSITE 0
54
#define NO_COMPOSITE 0
51
#define NO_COMPOSITE_SPANS 0
55
#define NO_COMPOSITE_SPANS 0
52
#define NO_COPY 0
56
#define NO_COPY 0
53
#define NO_COPY_BOXES 0
57
#define NO_COPY_BOXES 0
54
#define NO_FILL 0
58
#define NO_FILL 0
55
#define NO_FILL_BOXES 0
59
#define NO_FILL_BOXES 0
56
#define NO_FILL_ONE 0
60
#define NO_FILL_ONE 0
57
#define NO_FILL_CLEAR 0
61
#define NO_FILL_CLEAR 0
58
 
62
 
59
#define NO_RING_SWITCH 0
63
#define NO_RING_SWITCH 0
60
 
64
 
61
#define USE_8_PIXEL_DISPATCH 1
65
#define USE_8_PIXEL_DISPATCH 1
62
#define USE_16_PIXEL_DISPATCH 1
66
#define USE_16_PIXEL_DISPATCH 1
63
#define USE_32_PIXEL_DISPATCH 0
67
#define USE_32_PIXEL_DISPATCH 0
64
 
68
 
65
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
69
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
66
#error "Must select at least 8, 16 or 32 pixel dispatch"
70
#error "Must select at least 8, 16 or 32 pixel dispatch"
67
#endif
71
#endif
68
 
72
 
69
#define GEN7_MAX_SIZE 16384
73
#define GEN7_MAX_SIZE 16384
70
 
74
 
71
/* XXX Todo
75
/* XXX Todo
72
 *
76
 *
73
 * STR (software tiled rendering) mode. No, really.
77
 * STR (software tiled rendering) mode. No, really.
74
 * 64x32 pixel blocks align with the rendering cache. Worth considering.
78
 * 64x32 pixel blocks align with the rendering cache. Worth considering.
75
 */
79
 */
76
 
80
 
77
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
81
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
78
 
82
 
79
struct gt_info {
83
struct gt_info {
80
	const char *name;
84
	const char *name;
81
	uint32_t max_vs_threads;
85
	uint32_t max_vs_threads;
82
	uint32_t max_gs_threads;
86
	uint32_t max_gs_threads;
83
	uint32_t max_wm_threads;
87
	uint32_t max_wm_threads;
84
	struct {
88
	struct {
85
		int size;
89
		int size;
86
		int max_vs_entries;
90
		int max_vs_entries;
87
		int max_gs_entries;
91
		int max_gs_entries;
88
		int push_ps_size; /* in 1KBs */
92
		int push_ps_size; /* in 1KBs */
89
	} urb;
93
	} urb;
90
	int gt;
94
	int gt;
91
};
95
};
92
 
96
 
93
static const struct gt_info ivb_gt_info = {
97
static const struct gt_info ivb_gt_info = {
94
	.name = "Ivybridge (gen7)",
98
	.name = "Ivybridge (gen7)",
95
	.max_vs_threads = 16,
99
	.max_vs_threads = 16,
96
	.max_gs_threads = 16,
100
	.max_gs_threads = 16,
97
	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
101
	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
98
	.urb = { 128, 64, 64, 8 },
102
	.urb = { 128, 64, 64, 8 },
99
	.gt = 0,
103
	.gt = 0,
100
};
104
};
101
 
105
 
102
static const struct gt_info ivb_gt1_info = {
106
static const struct gt_info ivb_gt1_info = {
103
	.name = "Ivybridge (gen7, gt1)",
107
	.name = "Ivybridge (gen7, gt1)",
104
	.max_vs_threads = 36,
108
	.max_vs_threads = 36,
105
	.max_gs_threads = 36,
109
	.max_gs_threads = 36,
106
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
110
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
107
	.urb = { 128, 512, 192, 8 },
111
	.urb = { 128, 512, 192, 8 },
108
	.gt = 1,
112
	.gt = 1,
109
};
113
};
110
 
114
 
111
static const struct gt_info ivb_gt2_info = {
115
static const struct gt_info ivb_gt2_info = {
112
	.name = "Ivybridge (gen7, gt2)",
116
	.name = "Ivybridge (gen7, gt2)",
113
	.max_vs_threads = 128,
117
	.max_vs_threads = 128,
114
	.max_gs_threads = 128,
118
	.max_gs_threads = 128,
115
	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
119
	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
116
	.urb = { 256, 704, 320, 8 },
120
	.urb = { 256, 704, 320, 8 },
117
	.gt = 2,
121
	.gt = 2,
118
};
122
};
119
 
123
 
120
static const struct gt_info byt_gt_info = {
124
static const struct gt_info byt_gt_info = {
121
	.name = "Baytrail (gen7)",
125
	.name = "Baytrail (gen7)",
122
	.urb = { 128, 64, 64 },
126
	.urb = { 128, 64, 64 },
123
	.max_vs_threads = 36,
127
	.max_vs_threads = 36,
124
	.max_gs_threads = 36,
128
	.max_gs_threads = 36,
125
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
129
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
126
	.urb = { 128, 512, 192, 8 },
130
	.urb = { 128, 512, 192, 8 },
127
	.gt = 1,
131
	.gt = 1,
128
};
132
};
129
 
133
 
130
static const struct gt_info hsw_gt_info = {
134
static const struct gt_info hsw_gt_info = {
131
	.name = "Haswell (gen7.5)",
135
	.name = "Haswell (gen7.5)",
132
	.max_vs_threads = 8,
136
	.max_vs_threads = 8,
133
	.max_gs_threads = 8,
137
	.max_gs_threads = 8,
134
	.max_wm_threads =
138
	.max_wm_threads =
135
		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
139
		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
136
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
140
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
137
	.urb = { 128, 64, 64, 8 },
141
	.urb = { 128, 64, 64, 8 },
138
	.gt = 0,
142
	.gt = 0,
139
};
143
};
140
 
144
 
141
static const struct gt_info hsw_gt1_info = {
145
static const struct gt_info hsw_gt1_info = {
142
	.name = "Haswell (gen7.5, gt1)",
146
	.name = "Haswell (gen7.5, gt1)",
143
	.max_vs_threads = 70,
147
	.max_vs_threads = 70,
144
	.max_gs_threads = 70,
148
	.max_gs_threads = 70,
145
	.max_wm_threads =
149
	.max_wm_threads =
146
		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
150
		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
147
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
151
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
148
	.urb = { 128, 640, 256, 8 },
152
	.urb = { 128, 640, 256, 8 },
149
	.gt = 1,
153
	.gt = 1,
150
};
154
};
151
 
155
 
152
static const struct gt_info hsw_gt2_info = {
156
static const struct gt_info hsw_gt2_info = {
153
	.name = "Haswell (gen7.5, gt2)",
157
	.name = "Haswell (gen7.5, gt2)",
154
	.max_vs_threads = 140,
158
	.max_vs_threads = 140,
155
	.max_gs_threads = 140,
159
	.max_gs_threads = 140,
156
	.max_wm_threads =
160
	.max_wm_threads =
157
		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
161
		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
158
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
162
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
159
	.urb = { 256, 1664, 640, 8 },
163
	.urb = { 256, 1664, 640, 8 },
160
	.gt = 2,
164
	.gt = 2,
161
};
165
};
162
 
166
 
163
static const struct gt_info hsw_gt3_info = {
167
static const struct gt_info hsw_gt3_info = {
164
	.name = "Haswell (gen7.5, gt3)",
168
	.name = "Haswell (gen7.5, gt3)",
165
	.max_vs_threads = 280,
169
	.max_vs_threads = 280,
166
	.max_gs_threads = 280,
170
	.max_gs_threads = 280,
167
	.max_wm_threads =
171
	.max_wm_threads =
168
		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
172
		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
169
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
173
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
170
	.urb = { 512, 3328, 1280, 16 },
174
	.urb = { 512, 3328, 1280, 16 },
171
	.gt = 3,
175
	.gt = 3,
172
};
176
};
173
 
177
 
174
inline static bool is_ivb(struct sna *sna)
178
inline static bool is_ivb(struct sna *sna)
175
{
179
{
176
	return sna->kgem.gen == 070;
180
	return sna->kgem.gen == 070;
177
}
181
}
178
 
182
 
179
inline static bool is_byt(struct sna *sna)
183
inline static bool is_byt(struct sna *sna)
180
{
184
{
181
	return sna->kgem.gen == 071;
185
	return sna->kgem.gen == 071;
182
}
186
}
183
 
187
 
184
inline static bool is_hsw(struct sna *sna)
188
inline static bool is_hsw(struct sna *sna)
185
{
189
{
186
	return sna->kgem.gen == 075;
190
	return sna->kgem.gen == 075;
187
}
191
}
188
 
192
 
189
static const uint32_t ps_kernel_packed[][4] = {
193
static const uint32_t ps_kernel_packed[][4] = {
190
#include "exa_wm_src_affine.g7b"
194
#include "exa_wm_src_affine.g7b"
191
#include "exa_wm_src_sample_argb.g7b"
195
#include "exa_wm_src_sample_argb.g7b"
192
#include "exa_wm_yuv_rgb.g7b"
196
#include "exa_wm_yuv_rgb.g7b"
193
#include "exa_wm_write.g7b"
197
#include "exa_wm_write.g7b"
194
};
198
};
195
 
199
 
196
static const uint32_t ps_kernel_planar[][4] = {
200
static const uint32_t ps_kernel_planar[][4] = {
197
#include "exa_wm_src_affine.g7b"
201
#include "exa_wm_src_affine.g7b"
198
#include "exa_wm_src_sample_planar.g7b"
202
#include "exa_wm_src_sample_planar.g7b"
199
#include "exa_wm_yuv_rgb.g7b"
203
#include "exa_wm_yuv_rgb.g7b"
200
#include "exa_wm_write.g7b"
204
#include "exa_wm_write.g7b"
201
};
205
};
202
 
206
 
203
#define KERNEL(kernel_enum, kernel, num_surfaces) \
207
#define KERNEL(kernel_enum, kernel, num_surfaces) \
204
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
208
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
205
#define NOKERNEL(kernel_enum, func, num_surfaces) \
209
#define NOKERNEL(kernel_enum, func, num_surfaces) \
206
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
210
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
207
static const struct wm_kernel_info {
211
static const struct wm_kernel_info {
208
	const char *name;
212
	const char *name;
209
	const void *data;
213
	const void *data;
210
	unsigned int size;
214
	unsigned int size;
211
	int num_surfaces;
215
	int num_surfaces;
212
} wm_kernels[] = {
216
} wm_kernels[] = {
213
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
217
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
214
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
218
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
215
 
219
 
216
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
220
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
217
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
221
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
218
 
222
 
219
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
223
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
220
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
224
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
221
 
225
 
222
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
226
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
223
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
227
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
224
 
228
 
225
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
229
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
226
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
230
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
227
 
231
 
228
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
232
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
229
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
233
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
230
};
234
};
231
#undef KERNEL
235
#undef KERNEL
232
 
236
 
233
static const struct blendinfo {
237
static const struct blendinfo {
234
	bool src_alpha;
238
	bool src_alpha;
235
	uint32_t src_blend;
239
	uint32_t src_blend;
236
	uint32_t dst_blend;
240
	uint32_t dst_blend;
237
} gen7_blend_op[] = {
241
} gen7_blend_op[] = {
238
	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
242
	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
239
	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
243
	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
240
	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
244
	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
241
	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
245
	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
242
	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
246
	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
243
	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
247
	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
244
	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
248
	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
245
	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
249
	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
246
	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
250
	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
247
	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
251
	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
248
	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
252
	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
249
	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
253
	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
250
	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
254
	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
251
};
255
};
252
 
256
 
253
/**
257
/**
254
 * Highest-valued BLENDFACTOR used in gen7_blend_op.
258
 * Highest-valued BLENDFACTOR used in gen7_blend_op.
255
 *
259
 *
256
 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
260
 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
257
 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
261
 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
258
 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
262
 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
259
 */
263
 */
260
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
264
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
261
 
265
 
262
#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
266
#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
263
 
267
 
264
#define BLEND_OFFSET(s, d) \
268
#define BLEND_OFFSET(s, d) \
265
	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
269
	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
266
	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
270
	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
267
 
271
 
268
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
272
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
269
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
273
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
270
 
274
 
271
#define SAMPLER_OFFSET(sf, se, mf, me) \
275
#define SAMPLER_OFFSET(sf, se, mf, me) \
272
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
276
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
273
 
277
 
274
#define VERTEX_2s2s 0
278
#define VERTEX_2s2s 0
275
 
279
 
276
#define COPY_SAMPLER 0
280
#define COPY_SAMPLER 0
277
#define COPY_VERTEX VERTEX_2s2s
281
#define COPY_VERTEX VERTEX_2s2s
278
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
282
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
279
 
283
 
280
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
284
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
281
#define FILL_VERTEX VERTEX_2s2s
285
#define FILL_VERTEX VERTEX_2s2s
282
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
286
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
283
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
287
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
284
 
288
 
285
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
289
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
286
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
290
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
287
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
291
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
288
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
292
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
289
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
293
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
290
#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
294
#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
291
 
295
 
292
#define OUT_BATCH(v) batch_emit(sna, v)
296
#define OUT_BATCH(v) batch_emit(sna, v)
293
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
297
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
294
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
298
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
295
 
299
 
296
static inline bool too_large(int width, int height)
300
static inline bool too_large(int width, int height)
297
{
301
{
298
	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
302
	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
299
}
303
}
300
 
304
 
301
static uint32_t gen7_get_blend(int op,
305
static uint32_t gen7_get_blend(int op,
302
			       bool has_component_alpha,
306
			       bool has_component_alpha,
303
			       uint32_t dst_format)
307
			       uint32_t dst_format)
304
{
308
{
305
	uint32_t src, dst;
309
	uint32_t src, dst;
306
 
310
 
307
 
311
 
308
    src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
312
    src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
309
    dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
313
    dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
310
 
314
 
311
 
315
 
312
#if 0
316
#if 0
313
	/* If there's no dst alpha channel, adjust the blend op so that
317
	/* If there's no dst alpha channel, adjust the blend op so that
314
	 * we'll treat it always as 1.
318
	 * we'll treat it always as 1.
315
	 */
319
	 */
316
	if (PICT_FORMAT_A(dst_format) == 0) {
320
	if (PICT_FORMAT_A(dst_format) == 0) {
317
		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
321
		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
318
			src = GEN7_BLENDFACTOR_ONE;
322
			src = GEN7_BLENDFACTOR_ONE;
319
		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
323
		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
320
			src = GEN7_BLENDFACTOR_ZERO;
324
			src = GEN7_BLENDFACTOR_ZERO;
321
	}
325
	}
322
 
326
 
323
	/* If the source alpha is being used, then we should only be in a
327
	/* If the source alpha is being used, then we should only be in a
324
	 * case where the source blend factor is 0, and the source blend
328
	 * case where the source blend factor is 0, and the source blend
325
	 * value is the mask channels multiplied by the source picture's alpha.
329
	 * value is the mask channels multiplied by the source picture's alpha.
326
	 */
330
	 */
327
	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
331
	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
328
		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
332
		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
329
			dst = GEN7_BLENDFACTOR_SRC_COLOR;
333
			dst = GEN7_BLENDFACTOR_SRC_COLOR;
330
		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
334
		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
331
			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
335
			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
332
	}
336
	}
333
#endif
337
#endif
334
 
338
 
335
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
339
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
336
	     op, dst_format, PICT_FORMAT_A(dst_format),
340
	     op, dst_format, PICT_FORMAT_A(dst_format),
337
	     src, dst, (int)BLEND_OFFSET(src, dst)));
341
	     src, dst, (int)BLEND_OFFSET(src, dst)));
338
	return BLEND_OFFSET(src, dst);
342
	return BLEND_OFFSET(src, dst);
339
}
343
}
340
 
344
 
341
static uint32_t gen7_get_card_format(PictFormat format)
345
static uint32_t gen7_get_card_format(PictFormat format)
342
{
346
{
343
	switch (format) {
347
	switch (format) {
344
	default:
348
	default:
345
		return -1;
349
		return -1;
346
	case PICT_a8r8g8b8:
350
	case PICT_a8r8g8b8:
347
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
351
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
348
	case PICT_x8r8g8b8:
352
	case PICT_x8r8g8b8:
349
		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
353
		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
350
	case PICT_a8b8g8r8:
354
	case PICT_a8b8g8r8:
351
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
355
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
352
	case PICT_x8b8g8r8:
356
	case PICT_x8b8g8r8:
353
		return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
357
		return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
354
	case PICT_a2r10g10b10:
358
	case PICT_a2r10g10b10:
355
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
359
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
356
	case PICT_x2r10g10b10:
360
	case PICT_x2r10g10b10:
357
		return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
361
		return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
358
	case PICT_r8g8b8:
362
	case PICT_r8g8b8:
359
		return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
363
		return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
360
	case PICT_r5g6b5:
364
	case PICT_r5g6b5:
361
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
365
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
362
	case PICT_a1r5g5b5:
366
	case PICT_a1r5g5b5:
363
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
367
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
364
	case PICT_a8:
368
	case PICT_a8:
365
		return GEN7_SURFACEFORMAT_A8_UNORM;
369
		return GEN7_SURFACEFORMAT_A8_UNORM;
366
	case PICT_a4r4g4b4:
370
	case PICT_a4r4g4b4:
367
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
371
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
368
	}
372
	}
369
}
373
}
370
 
374
 
371
static uint32_t gen7_get_dest_format(PictFormat format)
375
static uint32_t gen7_get_dest_format(PictFormat format)
372
{
376
{
373
	switch (format) {
377
	switch (format) {
374
	default:
378
	default:
375
		return -1;
379
		return -1;
376
	case PICT_a8r8g8b8:
380
	case PICT_a8r8g8b8:
377
	case PICT_x8r8g8b8:
381
	case PICT_x8r8g8b8:
378
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
382
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
379
	case PICT_a8b8g8r8:
383
	case PICT_a8b8g8r8:
380
	case PICT_x8b8g8r8:
384
	case PICT_x8b8g8r8:
381
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
385
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
382
	case PICT_a2r10g10b10:
386
	case PICT_a2r10g10b10:
383
	case PICT_x2r10g10b10:
387
	case PICT_x2r10g10b10:
384
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
388
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
385
	case PICT_r5g6b5:
389
	case PICT_r5g6b5:
386
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
390
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
387
	case PICT_x1r5g5b5:
391
	case PICT_x1r5g5b5:
388
	case PICT_a1r5g5b5:
392
	case PICT_a1r5g5b5:
389
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
393
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
390
	case PICT_a8:
394
	case PICT_a8:
391
		return GEN7_SURFACEFORMAT_A8_UNORM;
395
		return GEN7_SURFACEFORMAT_A8_UNORM;
392
	case PICT_a4r4g4b4:
396
	case PICT_a4r4g4b4:
393
	case PICT_x4r4g4b4:
397
	case PICT_x4r4g4b4:
394
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
398
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
395
	}
399
	}
396
}
400
}
397
 
401
 
398
static int
402
static int
399
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
403
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
400
{
404
{
401
	int base;
405
	int base;
402
 
406
 
403
	if (has_mask) {
407
	if (has_mask) {
404
		if (is_ca) {
408
		if (is_ca) {
405
			if (gen7_blend_op[op].src_alpha)
409
			if (gen7_blend_op[op].src_alpha)
406
				base = GEN7_WM_KERNEL_MASKSA;
410
				base = GEN7_WM_KERNEL_MASKSA;
407
			else
411
			else
408
				base = GEN7_WM_KERNEL_MASKCA;
412
				base = GEN7_WM_KERNEL_MASKCA;
409
		} else
413
		} else
410
			base = GEN7_WM_KERNEL_MASK;
414
			base = GEN7_WM_KERNEL_MASK;
411
	} else
415
	} else
412
		base = GEN7_WM_KERNEL_NOMASK;
416
		base = GEN7_WM_KERNEL_NOMASK;
413
 
417
 
414
	return base + !is_affine;
418
	return base + !is_affine;
415
}
419
}
416
 
420
 
417
static void
421
static void
418
gen7_emit_urb(struct sna *sna)
422
gen7_emit_urb(struct sna *sna)
419
{
423
{
420
	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
424
	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
421
	OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
425
	OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
422
 
426
 
423
	/* num of VS entries must be divisible by 8 if size < 9 */
427
	/* num of VS entries must be divisible by 8 if size < 9 */
424
	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
428
	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
425
	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
429
	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
426
		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
430
		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
427
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
431
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
428
 
432
 
429
	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
433
	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
430
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
434
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
431
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
435
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
432
 
436
 
433
	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
437
	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
434
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
438
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
435
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
439
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
436
 
440
 
437
	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
441
	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
438
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
442
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
439
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
443
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
440
}
444
}
441
 
445
 
442
static void
446
static void
443
gen7_emit_state_base_address(struct sna *sna)
447
gen7_emit_state_base_address(struct sna *sna)
444
{
448
{
445
	uint32_t mocs;
449
	uint32_t mocs;
446
 
450
 
447
	mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
451
	mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
448
 
452
 
449
	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
453
	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
450
	OUT_BATCH(0); /* general */
454
	OUT_BATCH(0); /* general */
451
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
455
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
452
				 sna->kgem.nbatch,
456
				 sna->kgem.nbatch,
453
				 NULL,
457
				 NULL,
454
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
458
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
455
				 BASE_ADDRESS_MODIFY));
459
				 BASE_ADDRESS_MODIFY));
456
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
460
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
457
				 sna->kgem.nbatch,
461
				 sna->kgem.nbatch,
458
				 sna->render_state.gen7.general_bo,
462
				 sna->render_state.gen7.general_bo,
459
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
463
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
460
				 mocs | BASE_ADDRESS_MODIFY));
464
				 mocs | BASE_ADDRESS_MODIFY));
461
	OUT_BATCH(0); /* indirect */
465
	OUT_BATCH(0); /* indirect */
462
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
466
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
463
				 sna->kgem.nbatch,
467
				 sna->kgem.nbatch,
464
				 sna->render_state.gen7.general_bo,
468
				 sna->render_state.gen7.general_bo,
465
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
469
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
466
				 mocs | BASE_ADDRESS_MODIFY));
470
				 mocs | BASE_ADDRESS_MODIFY));
467
 
471
 
468
	/* upper bounds, disable */
472
	/* upper bounds, disable */
469
	OUT_BATCH(0);
473
	OUT_BATCH(0);
470
	OUT_BATCH(BASE_ADDRESS_MODIFY);
474
	OUT_BATCH(BASE_ADDRESS_MODIFY);
471
	OUT_BATCH(0);
475
	OUT_BATCH(0);
472
	OUT_BATCH(BASE_ADDRESS_MODIFY);
476
	OUT_BATCH(BASE_ADDRESS_MODIFY);
473
}
477
}
474
 
478
 
475
static void
479
static void
476
gen7_disable_vs(struct sna *sna)
480
gen7_disable_vs(struct sna *sna)
477
{
481
{
478
	/* For future reference:
482
	/* For future reference:
479
	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
483
	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
480
	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
484
	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
481
	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
485
	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
482
	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
486
	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
483
	 *
487
	 *
484
	 * Here we saved by the full-flush incurred when emitting
488
	 * Here we saved by the full-flush incurred when emitting
485
	 * the batchbuffer.
489
	 * the batchbuffer.
486
	 */
490
	 */
487
	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
491
	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
488
	OUT_BATCH(0); /* no VS kernel */
492
	OUT_BATCH(0); /* no VS kernel */
489
	OUT_BATCH(0);
493
	OUT_BATCH(0);
490
	OUT_BATCH(0);
494
	OUT_BATCH(0);
491
	OUT_BATCH(0);
495
	OUT_BATCH(0);
492
	OUT_BATCH(0); /* pass-through */
496
	OUT_BATCH(0); /* pass-through */
493
 
497
 
494
#if 0
498
#if 0
495
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
499
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
496
	OUT_BATCH(0);
500
	OUT_BATCH(0);
497
	OUT_BATCH(0);
501
	OUT_BATCH(0);
498
	OUT_BATCH(0);
502
	OUT_BATCH(0);
499
	OUT_BATCH(0);
503
	OUT_BATCH(0);
500
	OUT_BATCH(0);
504
	OUT_BATCH(0);
501
	OUT_BATCH(0);
505
	OUT_BATCH(0);
502
 
506
 
503
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
507
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
504
	OUT_BATCH(0);
508
	OUT_BATCH(0);
505
 
509
 
506
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
510
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
507
	OUT_BATCH(0);
511
	OUT_BATCH(0);
508
#endif
512
#endif
509
}
513
}
510
 
514
 
511
static void
515
static void
512
gen7_disable_hs(struct sna *sna)
516
gen7_disable_hs(struct sna *sna)
513
{
517
{
514
	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
518
	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
515
	OUT_BATCH(0); /* no HS kernel */
519
	OUT_BATCH(0); /* no HS kernel */
516
	OUT_BATCH(0);
520
	OUT_BATCH(0);
517
	OUT_BATCH(0);
521
	OUT_BATCH(0);
518
	OUT_BATCH(0);
522
	OUT_BATCH(0);
519
	OUT_BATCH(0);
523
	OUT_BATCH(0);
520
	OUT_BATCH(0); /* pass-through */
524
	OUT_BATCH(0); /* pass-through */
521
 
525
 
522
#if 0
526
#if 0
523
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
527
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
524
	OUT_BATCH(0);
528
	OUT_BATCH(0);
525
	OUT_BATCH(0);
529
	OUT_BATCH(0);
526
	OUT_BATCH(0);
530
	OUT_BATCH(0);
527
	OUT_BATCH(0);
531
	OUT_BATCH(0);
528
	OUT_BATCH(0);
532
	OUT_BATCH(0);
529
	OUT_BATCH(0);
533
	OUT_BATCH(0);
530
 
534
 
531
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
535
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
532
	OUT_BATCH(0);
536
	OUT_BATCH(0);
533
 
537
 
534
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
538
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
535
	OUT_BATCH(0);
539
	OUT_BATCH(0);
536
#endif
540
#endif
537
}
541
}
538
 
542
 
539
static void
543
static void
540
gen7_disable_te(struct sna *sna)
544
gen7_disable_te(struct sna *sna)
541
{
545
{
542
	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
546
	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
543
	OUT_BATCH(0);
547
	OUT_BATCH(0);
544
	OUT_BATCH(0);
548
	OUT_BATCH(0);
545
	OUT_BATCH(0);
549
	OUT_BATCH(0);
546
}
550
}
547
 
551
 
548
static void
552
static void
549
gen7_disable_ds(struct sna *sna)
553
gen7_disable_ds(struct sna *sna)
550
{
554
{
551
	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
555
	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
552
	OUT_BATCH(0);
556
	OUT_BATCH(0);
553
	OUT_BATCH(0);
557
	OUT_BATCH(0);
554
	OUT_BATCH(0);
558
	OUT_BATCH(0);
555
	OUT_BATCH(0);
559
	OUT_BATCH(0);
556
	OUT_BATCH(0);
560
	OUT_BATCH(0);
557
 
561
 
558
#if 0
562
#if 0
559
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
563
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
560
	OUT_BATCH(0);
564
	OUT_BATCH(0);
561
	OUT_BATCH(0);
565
	OUT_BATCH(0);
562
	OUT_BATCH(0);
566
	OUT_BATCH(0);
563
	OUT_BATCH(0);
567
	OUT_BATCH(0);
564
	OUT_BATCH(0);
568
	OUT_BATCH(0);
565
	OUT_BATCH(0);
569
	OUT_BATCH(0);
566
 
570
 
567
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
571
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
568
	OUT_BATCH(0);
572
	OUT_BATCH(0);
569
 
573
 
570
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
574
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
571
	OUT_BATCH(0);
575
	OUT_BATCH(0);
572
#endif
576
#endif
573
}
577
}
574
 
578
 
575
static void
579
static void
576
gen7_disable_gs(struct sna *sna)
580
gen7_disable_gs(struct sna *sna)
577
{
581
{
578
	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
582
	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
579
	OUT_BATCH(0); /* no GS kernel */
583
	OUT_BATCH(0); /* no GS kernel */
580
	OUT_BATCH(0);
584
	OUT_BATCH(0);
581
	OUT_BATCH(0);
585
	OUT_BATCH(0);
582
	OUT_BATCH(0);
586
	OUT_BATCH(0);
583
	OUT_BATCH(0);
587
	OUT_BATCH(0);
584
	OUT_BATCH(0); /* pass-through */
588
	OUT_BATCH(0); /* pass-through */
585
 
589
 
586
#if 0
590
#if 0
587
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
591
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
588
	OUT_BATCH(0);
592
	OUT_BATCH(0);
589
	OUT_BATCH(0);
593
	OUT_BATCH(0);
590
	OUT_BATCH(0);
594
	OUT_BATCH(0);
591
	OUT_BATCH(0);
595
	OUT_BATCH(0);
592
	OUT_BATCH(0);
596
	OUT_BATCH(0);
593
	OUT_BATCH(0);
597
	OUT_BATCH(0);
594
 
598
 
595
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
599
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
596
	OUT_BATCH(0);
600
	OUT_BATCH(0);
597
 
601
 
598
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
602
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
599
	OUT_BATCH(0);
603
	OUT_BATCH(0);
600
#endif
604
#endif
601
}
605
}
602
 
606
 
603
static void
607
static void
604
gen7_disable_streamout(struct sna *sna)
608
gen7_disable_streamout(struct sna *sna)
605
{
609
{
606
	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
610
	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
607
	OUT_BATCH(0);
611
	OUT_BATCH(0);
608
	OUT_BATCH(0);
612
	OUT_BATCH(0);
609
}
613
}
610
 
614
 
611
static void
615
static void
612
gen7_emit_sf_invariant(struct sna *sna)
616
gen7_emit_sf_invariant(struct sna *sna)
613
{
617
{
614
	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
618
	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
615
	OUT_BATCH(0);
619
	OUT_BATCH(0);
616
	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
620
	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
617
	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
621
	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
618
	OUT_BATCH(0);
622
	OUT_BATCH(0);
619
	OUT_BATCH(0);
623
	OUT_BATCH(0);
620
	OUT_BATCH(0);
624
	OUT_BATCH(0);
621
}
625
}
622
 
626
 
623
static void
627
static void
624
gen7_emit_cc_invariant(struct sna *sna)
628
gen7_emit_cc_invariant(struct sna *sna)
625
{
629
{
626
#if 0 /* unused, no change */
630
#if 0 /* unused, no change */
627
	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
631
	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
628
	OUT_BATCH(0);
632
	OUT_BATCH(0);
629
 
633
 
630
	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
634
	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
631
	OUT_BATCH(0);
635
	OUT_BATCH(0);
632
#endif
636
#endif
633
 
637
 
634
	/* XXX clear to be safe */
638
	/* XXX clear to be safe */
635
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
639
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
636
	OUT_BATCH(0);
640
	OUT_BATCH(0);
637
}
641
}
638
 
642
 
639
static void
643
static void
640
gen7_disable_clip(struct sna *sna)
644
gen7_disable_clip(struct sna *sna)
641
{
645
{
642
	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
646
	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
643
	OUT_BATCH(0);
647
	OUT_BATCH(0);
644
	OUT_BATCH(0); /* pass-through */
648
	OUT_BATCH(0); /* pass-through */
645
	OUT_BATCH(0);
649
	OUT_BATCH(0);
646
 
650
 
647
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
651
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
648
	OUT_BATCH(0);
652
	OUT_BATCH(0);
649
}
653
}
650
 
654
 
651
static void
655
static void
652
gen7_emit_wm_invariant(struct sna *sna)
656
gen7_emit_wm_invariant(struct sna *sna)
653
{
657
{
654
	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
658
	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
655
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
659
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
656
		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
660
		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
657
	OUT_BATCH(0);
661
	OUT_BATCH(0);
658
 
662
 
659
#if 0
663
#if 0
660
	/* XXX length bias of 7 in old spec? */
664
	/* XXX length bias of 7 in old spec? */
661
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
665
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
662
	OUT_BATCH(0);
666
	OUT_BATCH(0);
663
	OUT_BATCH(0);
667
	OUT_BATCH(0);
664
	OUT_BATCH(0);
668
	OUT_BATCH(0);
665
	OUT_BATCH(0);
669
	OUT_BATCH(0);
666
	OUT_BATCH(0);
670
	OUT_BATCH(0);
667
	OUT_BATCH(0);
671
	OUT_BATCH(0);
668
#endif
672
#endif
669
}
673
}
670
 
674
 
671
static void
675
static void
672
gen7_emit_null_depth_buffer(struct sna *sna)
676
gen7_emit_null_depth_buffer(struct sna *sna)
673
{
677
{
674
	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
678
	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
675
	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
679
	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
676
		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
680
		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
677
	OUT_BATCH(0); /* disable depth, stencil and hiz */
681
	OUT_BATCH(0); /* disable depth, stencil and hiz */
678
	OUT_BATCH(0);
682
	OUT_BATCH(0);
679
	OUT_BATCH(0);
683
	OUT_BATCH(0);
680
	OUT_BATCH(0);
684
	OUT_BATCH(0);
681
	OUT_BATCH(0);
685
	OUT_BATCH(0);
682
 
686
 
683
#if 0
687
#if 0
684
	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
688
	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
685
	OUT_BATCH(0);
689
	OUT_BATCH(0);
686
	OUT_BATCH(0);
690
	OUT_BATCH(0);
687
#endif
691
#endif
688
}
692
}
689
 
693
 
690
static void
694
static void
691
gen7_emit_invariant(struct sna *sna)
695
gen7_emit_invariant(struct sna *sna)
692
{
696
{
693
	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
697
	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
694
 
698
 
695
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
699
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
696
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
700
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
697
		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
701
		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
698
	OUT_BATCH(0);
702
	OUT_BATCH(0);
699
	OUT_BATCH(0);
703
	OUT_BATCH(0);
700
 
704
 
701
	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
705
	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
702
	OUT_BATCH(1);
706
	OUT_BATCH(1);
703
 
707
 
704
	gen7_emit_urb(sna);
708
	gen7_emit_urb(sna);
705
 
709
 
706
	gen7_emit_state_base_address(sna);
710
	gen7_emit_state_base_address(sna);
707
 
711
 
708
	gen7_disable_vs(sna);
712
	gen7_disable_vs(sna);
709
	gen7_disable_hs(sna);
713
	gen7_disable_hs(sna);
710
	gen7_disable_te(sna);
714
	gen7_disable_te(sna);
711
	gen7_disable_ds(sna);
715
	gen7_disable_ds(sna);
712
	gen7_disable_gs(sna);
716
	gen7_disable_gs(sna);
713
	gen7_disable_clip(sna);
717
	gen7_disable_clip(sna);
714
	gen7_emit_sf_invariant(sna);
718
	gen7_emit_sf_invariant(sna);
715
	gen7_emit_wm_invariant(sna);
719
	gen7_emit_wm_invariant(sna);
716
	gen7_emit_cc_invariant(sna);
720
	gen7_emit_cc_invariant(sna);
717
	gen7_disable_streamout(sna);
721
	gen7_disable_streamout(sna);
718
	gen7_emit_null_depth_buffer(sna);
722
	gen7_emit_null_depth_buffer(sna);
719
 
723
 
720
	sna->render_state.gen7.needs_invariant = false;
724
	sna->render_state.gen7.needs_invariant = false;
721
}
725
}
722
 
726
 
723
static void
727
static void
724
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
728
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
725
{
729
{
726
	struct gen7_render_state *render = &sna->render_state.gen7;
730
	struct gen7_render_state *render = &sna->render_state.gen7;
727
 
731
 
728
	if (render->blend == blend_offset)
732
	if (render->blend == blend_offset)
729
		return;
733
		return;
730
 
734
 
731
	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
735
	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
732
 
736
 
733
	/* XXX can have upto 8 blend states preload, selectable via
737
	/* XXX can have upto 8 blend states preload, selectable via
734
	 * Render Target Index. What other side-effects of Render Target Index?
738
	 * Render Target Index. What other side-effects of Render Target Index?
735
	 */
739
	 */
736
 
740
 
737
	assert (is_aligned(render->cc_blend + blend_offset, 64));
741
	assert (is_aligned(render->cc_blend + blend_offset, 64));
738
	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
742
	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
739
	OUT_BATCH((render->cc_blend + blend_offset) | 1);
743
	OUT_BATCH((render->cc_blend + blend_offset) | 1);
740
 
744
 
741
	render->blend = blend_offset;
745
	render->blend = blend_offset;
742
}
746
}
743
 
747
 
744
static void
748
static void
745
gen7_emit_sampler(struct sna *sna, uint32_t state)
749
gen7_emit_sampler(struct sna *sna, uint32_t state)
746
{
750
{
747
	if (sna->render_state.gen7.samplers == state)
751
	if (sna->render_state.gen7.samplers == state)
748
		return;
752
		return;
749
 
753
 
750
	sna->render_state.gen7.samplers = state;
754
	sna->render_state.gen7.samplers = state;
751
 
755
 
752
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
756
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
753
 
757
 
754
	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
758
	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
755
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
759
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
756
	OUT_BATCH(sna->render_state.gen7.wm_state + state);
760
	OUT_BATCH(sna->render_state.gen7.wm_state + state);
757
}
761
}
758
 
762
 
759
static void
763
static void
760
gen7_emit_sf(struct sna *sna, bool has_mask)
764
gen7_emit_sf(struct sna *sna, bool has_mask)
761
{
765
{
762
	int num_sf_outputs = has_mask ? 2 : 1;
766
	int num_sf_outputs = has_mask ? 2 : 1;
763
 
767
 
764
	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
768
	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
765
		return;
769
		return;
766
 
770
 
767
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
771
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
768
	     __FUNCTION__, num_sf_outputs, 1, 0));
772
	     __FUNCTION__, num_sf_outputs, 1, 0));
769
 
773
 
770
	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
774
	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
771
 
775
 
772
	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
776
	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
773
	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
777
	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
774
		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
778
		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
775
		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
779
		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
776
	OUT_BATCH(0);
780
	OUT_BATCH(0);
777
	OUT_BATCH(0); /* dw4 */
781
	OUT_BATCH(0); /* dw4 */
778
	OUT_BATCH(0);
782
	OUT_BATCH(0);
779
	OUT_BATCH(0);
783
	OUT_BATCH(0);
780
	OUT_BATCH(0);
784
	OUT_BATCH(0);
781
	OUT_BATCH(0); /* dw8 */
785
	OUT_BATCH(0); /* dw8 */
782
	OUT_BATCH(0);
786
	OUT_BATCH(0);
783
	OUT_BATCH(0);
787
	OUT_BATCH(0);
784
	OUT_BATCH(0);
788
	OUT_BATCH(0);
785
	OUT_BATCH(0); /* dw12 */
789
	OUT_BATCH(0); /* dw12 */
786
	OUT_BATCH(0);
790
	OUT_BATCH(0);
787
	OUT_BATCH(0);
791
	OUT_BATCH(0);
788
}
792
}
789
 
793
 
790
static void
794
static void
791
gen7_emit_wm(struct sna *sna, int kernel)
795
gen7_emit_wm(struct sna *sna, int kernel)
792
{
796
{
793
	const uint32_t *kernels;
797
	const uint32_t *kernels;
794
 
798
 
795
	if (sna->render_state.gen7.kernel == kernel)
799
	if (sna->render_state.gen7.kernel == kernel)
796
		return;
800
		return;
797
 
801
 
798
	sna->render_state.gen7.kernel = kernel;
802
	sna->render_state.gen7.kernel = kernel;
799
	kernels = sna->render_state.gen7.wm_kernel[kernel];
803
	kernels = sna->render_state.gen7.wm_kernel[kernel];
800
 
804
 
801
	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
805
	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
802
	     __FUNCTION__,
806
	     __FUNCTION__,
803
	     wm_kernels[kernel].name,
807
	     wm_kernels[kernel].name,
804
	     wm_kernels[kernel].num_surfaces,
808
	     wm_kernels[kernel].num_surfaces,
805
	     kernels[0], kernels[1], kernels[2]));
809
	     kernels[0], kernels[1], kernels[2]));
806
 
810
 
807
	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
811
	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
808
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
812
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
809
	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
813
	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
810
		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
814
		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
811
	OUT_BATCH(0); /* scratch address */
815
	OUT_BATCH(0); /* scratch address */
812
	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
816
	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
813
		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
817
		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
814
		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
818
		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
815
		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
819
		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
816
		  GEN7_PS_ATTRIBUTE_ENABLE);
820
		  GEN7_PS_ATTRIBUTE_ENABLE);
817
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
821
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
818
		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
822
		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
819
		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
823
		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
820
	OUT_BATCH(kernels[2]);
824
	OUT_BATCH(kernels[2]);
821
	OUT_BATCH(kernels[1]);
825
	OUT_BATCH(kernels[1]);
822
}
826
}
823
 
827
 
824
static bool
828
static bool
825
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
829
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
826
{
830
{
827
	if (sna->render_state.gen7.surface_table == offset)
831
	if (sna->render_state.gen7.surface_table == offset)
828
		return false;
832
		return false;
829
 
833
 
830
	/* Binding table pointers */
834
	/* Binding table pointers */
831
	assert(is_aligned(4*offset, 32));
835
	assert(is_aligned(4*offset, 32));
832
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
836
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
833
	OUT_BATCH(offset*4);
837
	OUT_BATCH(offset*4);
834
 
838
 
835
	sna->render_state.gen7.surface_table = offset;
839
	sna->render_state.gen7.surface_table = offset;
836
	return true;
840
	return true;
837
}
841
}
838
 
842
 
839
static bool
843
static bool
840
gen7_emit_drawing_rectangle(struct sna *sna,
844
gen7_emit_drawing_rectangle(struct sna *sna,
841
			    const struct sna_composite_op *op)
845
			    const struct sna_composite_op *op)
842
{
846
{
843
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
847
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
844
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
848
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
845
 
849
 
846
	assert(!too_large(op->dst.x, op->dst.y));
850
	assert(!too_large(op->dst.x, op->dst.y));
847
	assert(!too_large(op->dst.width, op->dst.height));
851
	assert(!too_large(op->dst.width, op->dst.height));
848
 
852
 
849
	if (sna->render_state.gen7.drawrect_limit == limit &&
853
	if (sna->render_state.gen7.drawrect_limit == limit &&
850
	    sna->render_state.gen7.drawrect_offset == offset)
854
	    sna->render_state.gen7.drawrect_offset == offset)
851
		return true;
855
		return true;
852
 
856
 
853
	sna->render_state.gen7.drawrect_offset = offset;
857
	sna->render_state.gen7.drawrect_offset = offset;
854
	sna->render_state.gen7.drawrect_limit = limit;
858
	sna->render_state.gen7.drawrect_limit = limit;
855
 
859
 
856
	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
860
	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
857
	OUT_BATCH(0);
861
	OUT_BATCH(0);
858
	OUT_BATCH(limit);
862
	OUT_BATCH(limit);
859
	OUT_BATCH(offset);
863
	OUT_BATCH(offset);
860
	return false;
864
	return false;
861
}
865
}
862
 
866
 
863
static void
867
static void
864
gen7_emit_vertex_elements(struct sna *sna,
868
gen7_emit_vertex_elements(struct sna *sna,
865
			  const struct sna_composite_op *op)
869
			  const struct sna_composite_op *op)
866
{
870
{
867
	/*
871
	/*
868
	 * vertex data in vertex buffer
872
	 * vertex data in vertex buffer
869
	 *    position: (x, y)
873
	 *    position: (x, y)
870
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
874
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
871
	 *    texture coordinate 1 if (has_mask is true): same as above
875
	 *    texture coordinate 1 if (has_mask is true): same as above
872
	 */
876
	 */
873
	struct gen7_render_state *render = &sna->render_state.gen7;
877
	struct gen7_render_state *render = &sna->render_state.gen7;
874
	uint32_t src_format, dw;
878
	uint32_t src_format, dw;
875
	int id = GEN7_VERTEX(op->u.gen7.flags);
879
	int id = GEN7_VERTEX(op->u.gen7.flags);
876
	bool has_mask;
880
	bool has_mask;
877
 
881
 
878
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
882
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
879
 
883
 
880
	if (render->ve_id == id)
884
	if (render->ve_id == id)
881
		return;
885
		return;
882
	render->ve_id = id;
886
	render->ve_id = id;
883
 
887
 
884
	/* The VUE layout
888
	/* The VUE layout
885
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
889
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
886
	 *    dword 4-7: position (x, y, 1.0, 1.0),
890
	 *    dword 4-7: position (x, y, 1.0, 1.0),
887
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
891
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
888
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
892
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
889
	 *
893
	 *
890
	 * dword 4-15 are fetched from vertex buffer
894
	 * dword 4-15 are fetched from vertex buffer
891
	 */
895
	 */
892
	has_mask = (id >> 2) != 0;
896
	has_mask = (id >> 2) != 0;
893
	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
897
	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
894
		((2 * (3 + has_mask)) + 1 - 2));
898
		((2 * (3 + has_mask)) + 1 - 2));
895
 
899
 
896
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
900
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
897
		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
901
		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
898
		  0 << GEN7_VE0_OFFSET_SHIFT);
902
		  0 << GEN7_VE0_OFFSET_SHIFT);
899
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
903
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
900
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
904
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
901
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
905
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
902
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
906
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
903
 
907
 
904
	/* x,y */
908
	/* x,y */
905
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
909
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
906
		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
910
		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
907
		  0 << GEN7_VE0_OFFSET_SHIFT);
911
		  0 << GEN7_VE0_OFFSET_SHIFT);
908
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
912
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
909
		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
913
		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
910
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
914
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
911
		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
915
		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
912
 
916
 
913
	/* u0, v0, w0 */
917
	/* u0, v0, w0 */
914
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
918
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
915
	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
919
	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
916
	switch (id & 3) {
920
	switch (id & 3) {
917
	default:
921
	default:
918
		assert(0);
922
		assert(0);
919
	case 0:
923
	case 0:
920
		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
924
		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
921
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
925
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
922
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
926
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
923
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
927
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
924
		break;
928
		break;
925
	case 1:
929
	case 1:
926
		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
930
		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
927
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
931
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
928
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
932
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
929
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
933
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
930
		break;
934
		break;
931
	case 2:
935
	case 2:
932
		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
936
		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
933
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
937
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
934
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
938
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
935
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
939
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
936
		break;
940
		break;
937
	case 3:
941
	case 3:
938
		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
942
		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
939
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
943
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
940
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
944
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
941
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
945
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
942
		break;
946
		break;
943
	}
947
	}
944
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
948
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
945
		  src_format << GEN7_VE0_FORMAT_SHIFT |
949
		  src_format << GEN7_VE0_FORMAT_SHIFT |
946
		  4 << GEN7_VE0_OFFSET_SHIFT);
950
		  4 << GEN7_VE0_OFFSET_SHIFT);
947
	OUT_BATCH(dw);
951
	OUT_BATCH(dw);
948
 
952
 
949
	/* u1, v1, w1 */
953
	/* u1, v1, w1 */
950
	if (has_mask) {
954
	if (has_mask) {
951
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
955
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
952
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
956
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
953
		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
957
		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
954
		switch (id >> 2) {
958
		switch (id >> 2) {
955
		case 1:
959
		case 1:
956
			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
960
			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
957
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
961
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
958
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
962
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
959
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
963
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
960
			break;
964
			break;
961
		default:
965
		default:
962
			assert(0);
966
			assert(0);
963
		case 2:
967
		case 2:
964
			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
968
			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
965
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
969
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
966
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
970
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
967
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
971
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
968
			break;
972
			break;
969
		case 3:
973
		case 3:
970
			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
974
			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
971
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
975
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
972
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
976
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
973
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
977
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
974
			break;
978
			break;
975
		}
979
		}
976
		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
980
		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
977
			  src_format << GEN7_VE0_FORMAT_SHIFT |
981
			  src_format << GEN7_VE0_FORMAT_SHIFT |
978
			  offset << GEN7_VE0_OFFSET_SHIFT);
982
			  offset << GEN7_VE0_OFFSET_SHIFT);
979
		OUT_BATCH(dw);
983
		OUT_BATCH(dw);
980
	}
984
	}
981
}
985
}
982
 
986
 
983
inline static void
987
inline static void
984
gen7_emit_pipe_invalidate(struct sna *sna)
988
gen7_emit_pipe_invalidate(struct sna *sna)
985
{
989
{
986
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
990
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
987
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
991
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
988
		  GEN7_PIPE_CONTROL_TC_FLUSH |
992
		  GEN7_PIPE_CONTROL_TC_FLUSH |
989
		  GEN7_PIPE_CONTROL_CS_STALL);
993
		  GEN7_PIPE_CONTROL_CS_STALL);
990
	OUT_BATCH(0);
994
	OUT_BATCH(0);
991
	OUT_BATCH(0);
995
	OUT_BATCH(0);
992
}
996
}
993
 
997
 
994
inline static void
998
inline static void
995
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
999
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
996
{
1000
{
997
	unsigned stall;
1001
	unsigned stall;
998
 
1002
 
999
	stall = 0;
1003
	stall = 0;
1000
	if (need_stall)
1004
	if (need_stall)
1001
		stall = (GEN7_PIPE_CONTROL_CS_STALL |
1005
		stall = (GEN7_PIPE_CONTROL_CS_STALL |
1002
			 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1006
			 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1003
 
1007
 
1004
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1008
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1005
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
1009
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
1006
	OUT_BATCH(0);
1010
	OUT_BATCH(0);
1007
	OUT_BATCH(0);
1011
	OUT_BATCH(0);
1008
}
1012
}
1009
 
1013
 
1010
inline static void
1014
inline static void
1011
gen7_emit_pipe_stall(struct sna *sna)
1015
gen7_emit_pipe_stall(struct sna *sna)
1012
{
1016
{
1013
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1017
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1014
	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
1018
	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
1015
		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1019
		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1016
	OUT_BATCH(0);
1020
	OUT_BATCH(0);
1017
	OUT_BATCH(0);
1021
	OUT_BATCH(0);
1018
}
1022
}
1019
 
1023
 
1020
static void
1024
static void
1021
gen7_emit_state(struct sna *sna,
1025
gen7_emit_state(struct sna *sna,
1022
		const struct sna_composite_op *op,
1026
		const struct sna_composite_op *op,
1023
		uint16_t wm_binding_table)
1027
		uint16_t wm_binding_table)
1024
{
1028
{
-
 
1029
	bool need_invalidate;
-
 
1030
	bool need_flush;
1025
	bool need_stall;
1031
	bool need_stall;
1026
 
1032
 
1027
	assert(op->dst.bo->exec);
1033
	assert(op->dst.bo->exec);
1028
 
1034
 
-
 
1035
	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
1029
	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1036
	if (ALWAYS_INVALIDATE)
-
 
1037
		need_invalidate = true;
-
 
1038
 
1030
	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
1039
	need_flush =
1031
	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
1040
		sna->render_state.gen7.emit_flush &&
-
 
1041
		wm_binding_table & GEN7_READS_DST(op->u.gen7.flags);
-
 
1042
	if (ALWAYS_FLUSH)
-
 
1043
		need_flush = true;
1032
	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1044
 
1033
	gen7_emit_vertex_elements(sna, op);
1045
	wm_binding_table &= ~1;
1034
 
1046
 
-
 
1047
	need_stall = sna->render_state.gen7.surface_table != wm_binding_table;
-
 
1048
	need_stall &= gen7_emit_drawing_rectangle(sna, op);
1035
	need_stall = gen7_emit_binding_table(sna, wm_binding_table);
1049
	if (ALWAYS_STALL)
1036
	need_stall &= gen7_emit_drawing_rectangle(sna, op);
1050
		need_stall = true;
1037
 
1051
 
1038
	if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1052
	if (need_invalidate) {
1039
		gen7_emit_pipe_invalidate(sna);
1053
		gen7_emit_pipe_invalidate(sna);
1040
		kgem_clear_dirty(&sna->kgem);
1054
		kgem_clear_dirty(&sna->kgem);
1041
		assert(op->dst.bo->exec);
1055
		assert(op->dst.bo->exec);
1042
			kgem_bo_mark_dirty(op->dst.bo);
1056
			kgem_bo_mark_dirty(op->dst.bo);
-
 
1057
 
1043
		sna->render_state.gen7.emit_flush = false;
1058
		need_flush = false;
1044
		need_stall = false;
1059
		need_stall = false;
1045
	}
1060
	}
1046
	if (sna->render_state.gen7.emit_flush) {
1061
	if (need_flush) {
1047
		gen7_emit_pipe_flush(sna, need_stall);
1062
		gen7_emit_pipe_flush(sna, need_stall);
1048
		need_stall = false;
1063
		need_stall = false;
1049
	}
1064
	}
1050
	if (need_stall)
1065
	if (need_stall)
1051
		gen7_emit_pipe_stall(sna);
1066
		gen7_emit_pipe_stall(sna);
-
 
1067
 
-
 
1068
	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
-
 
1069
	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
-
 
1070
	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
-
 
1071
	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
-
 
1072
	gen7_emit_vertex_elements(sna, op);
-
 
1073
	gen7_emit_binding_table(sna, wm_binding_table);
1052
 
1074
 
1053
	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
1075
	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
1054
}
1076
}
1055
 
1077
 
1056
static bool gen7_magic_ca_pass(struct sna *sna,
1078
static bool gen7_magic_ca_pass(struct sna *sna,
1057
			       const struct sna_composite_op *op)
1079
			       const struct sna_composite_op *op)
1058
{
1080
{
1059
	struct gen7_render_state *state = &sna->render_state.gen7;
1081
	struct gen7_render_state *state = &sna->render_state.gen7;
1060
 
1082
 
1061
	if (!op->need_magic_ca_pass)
1083
	if (!op->need_magic_ca_pass)
1062
		return false;
1084
		return false;
1063
 
1085
 
1064
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1086
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1065
	     sna->render.vertex_start, sna->render.vertex_index));
1087
	     sna->render.vertex_start, sna->render.vertex_index));
1066
 
1088
 
1067
	gen7_emit_pipe_stall(sna);
1089
	gen7_emit_pipe_stall(sna);
1068
 
1090
 
1069
	gen7_emit_cc(sna,
1091
	gen7_emit_cc(sna,
1070
		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
1092
		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
1071
					       op->dst.format)));
1093
					       op->dst.format)));
1072
	gen7_emit_wm(sna,
1094
	gen7_emit_wm(sna,
1073
		     gen7_choose_composite_kernel(PictOpAdd,
1095
		     gen7_choose_composite_kernel(PictOpAdd,
1074
						  true, true,
1096
						  true, true,
1075
						  op->is_affine));
1097
						  op->is_affine));
1076
 
1098
 
1077
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1099
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1078
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1100
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1079
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1101
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1080
	OUT_BATCH(sna->render.vertex_start);
1102
	OUT_BATCH(sna->render.vertex_start);
1081
	OUT_BATCH(1);	/* single instance */
1103
	OUT_BATCH(1);	/* single instance */
1082
	OUT_BATCH(0);	/* start instance location */
1104
	OUT_BATCH(0);	/* start instance location */
1083
	OUT_BATCH(0);	/* index buffer offset, ignored */
1105
	OUT_BATCH(0);	/* index buffer offset, ignored */
1084
 
1106
 
1085
	state->last_primitive = sna->kgem.nbatch;
1107
	state->last_primitive = sna->kgem.nbatch;
1086
	return true;
1108
	return true;
1087
}
1109
}
1088
 
1110
 
1089
static void null_create(struct sna_static_stream *stream)
1111
static void null_create(struct sna_static_stream *stream)
1090
{
1112
{
1091
	/* A bunch of zeros useful for legacy border color and depth-stencil */
1113
	/* A bunch of zeros useful for legacy border color and depth-stencil */
1092
	sna_static_stream_map(stream, 64, 64);
1114
	sna_static_stream_map(stream, 64, 64);
1093
}
1115
}
1094
 
1116
 
1095
static void
1117
static void
1096
sampler_state_init(struct gen7_sampler_state *sampler_state,
1118
sampler_state_init(struct gen7_sampler_state *sampler_state,
1097
		   sampler_filter_t filter,
1119
		   sampler_filter_t filter,
1098
		   sampler_extend_t extend)
1120
		   sampler_extend_t extend)
1099
{
1121
{
1100
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1122
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1101
 
1123
 
1102
	/* We use the legacy mode to get the semantics specified by
1124
	/* We use the legacy mode to get the semantics specified by
1103
	 * the Render extension. */
1125
	 * the Render extension. */
1104
	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1126
	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1105
 
1127
 
1106
	switch (filter) {
1128
	switch (filter) {
1107
	default:
1129
	default:
1108
	case SAMPLER_FILTER_NEAREST:
1130
	case SAMPLER_FILTER_NEAREST:
1109
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1131
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1110
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1132
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1111
		break;
1133
		break;
1112
	case SAMPLER_FILTER_BILINEAR:
1134
	case SAMPLER_FILTER_BILINEAR:
1113
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1135
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1114
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1136
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1115
		break;
1137
		break;
1116
	}
1138
	}
1117
 
1139
 
1118
	switch (extend) {
1140
	switch (extend) {
1119
	default:
1141
	default:
1120
	case SAMPLER_EXTEND_NONE:
1142
	case SAMPLER_EXTEND_NONE:
1121
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1143
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1122
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1144
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1123
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1145
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1124
		break;
1146
		break;
1125
	case SAMPLER_EXTEND_REPEAT:
1147
	case SAMPLER_EXTEND_REPEAT:
1126
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1148
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1127
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1149
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1128
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1150
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1129
		break;
1151
		break;
1130
	case SAMPLER_EXTEND_PAD:
1152
	case SAMPLER_EXTEND_PAD:
1131
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1153
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1132
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1154
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1133
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1155
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1134
		break;
1156
		break;
1135
	case SAMPLER_EXTEND_REFLECT:
1157
	case SAMPLER_EXTEND_REFLECT:
1136
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1158
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1137
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1159
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1138
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1160
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1139
		break;
1161
		break;
1140
	}
1162
	}
1141
}
1163
}
1142
 
1164
 
1143
static void
1165
static void
1144
sampler_copy_init(struct gen7_sampler_state *ss)
1166
sampler_copy_init(struct gen7_sampler_state *ss)
1145
{
1167
{
1146
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1168
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1147
	ss->ss3.non_normalized_coord = 1;
1169
	ss->ss3.non_normalized_coord = 1;
1148
 
1170
 
1149
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1171
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1150
}
1172
}
1151
 
1173
 
1152
static void
1174
static void
1153
sampler_fill_init(struct gen7_sampler_state *ss)
1175
sampler_fill_init(struct gen7_sampler_state *ss)
1154
{
1176
{
1155
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1177
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1156
	ss->ss3.non_normalized_coord = 1;
1178
	ss->ss3.non_normalized_coord = 1;
1157
 
1179
 
1158
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1180
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1159
}
1181
}
1160
 
1182
 
1161
static uint32_t
1183
static uint32_t
1162
gen7_tiling_bits(uint32_t tiling)
1184
gen7_tiling_bits(uint32_t tiling)
1163
{
1185
{
1164
	switch (tiling) {
1186
	switch (tiling) {
1165
	default: assert(0);
1187
	default: assert(0);
1166
	case I915_TILING_NONE: return 0;
1188
	case I915_TILING_NONE: return 0;
1167
	case I915_TILING_X: return GEN7_SURFACE_TILED;
1189
	case I915_TILING_X: return GEN7_SURFACE_TILED;
1168
	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1190
	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1169
	}
1191
	}
1170
}
1192
}
1171
 
1193
 
1172
/**
1194
/**
1173
 * Sets up the common fields for a surface state buffer for the given
1195
 * Sets up the common fields for a surface state buffer for the given
1174
 * picture in the given surface state buffer.
1196
 * picture in the given surface state buffer.
1175
 */
1197
 */
1176
static uint32_t
1198
static uint32_t
1177
gen7_bind_bo(struct sna *sna,
1199
gen7_bind_bo(struct sna *sna,
1178
	     struct kgem_bo *bo,
1200
	     struct kgem_bo *bo,
1179
	     uint32_t width,
1201
	     uint32_t width,
1180
	     uint32_t height,
1202
	     uint32_t height,
1181
	     uint32_t format,
1203
	     uint32_t format,
1182
	     bool is_dst)
1204
	     bool is_dst)
1183
{
1205
{
1184
	uint32_t *ss;
1206
	uint32_t *ss;
1185
	uint32_t domains;
1207
	uint32_t domains;
1186
	int offset;
1208
	int offset;
1187
	uint32_t is_scanout = is_dst && bo->scanout;
1209
	uint32_t is_scanout = is_dst && bo->scanout;
1188
 
1210
 
1189
	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1211
	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1190
 
1212
 
1191
	/* After the first bind, we manage the cache domains within the batch */
1213
	/* After the first bind, we manage the cache domains within the batch */
1192
	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1214
	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1193
	if (offset) {
1215
	if (offset) {
1194
		if (is_dst)
1216
		if (is_dst)
1195
			kgem_bo_mark_dirty(bo);
1217
			kgem_bo_mark_dirty(bo);
1196
		return offset * sizeof(uint32_t);
1218
		return offset * sizeof(uint32_t);
1197
	}
1219
	}
1198
 
1220
 
1199
	offset = sna->kgem.surface -=
1221
	offset = sna->kgem.surface -=
1200
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1222
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1201
	ss = sna->kgem.batch + offset;
1223
	ss = sna->kgem.batch + offset;
1202
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1224
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1203
		 gen7_tiling_bits(bo->tiling) |
1225
		 gen7_tiling_bits(bo->tiling) |
1204
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1226
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1205
	if (bo->tiling == I915_TILING_Y)
1227
	if (bo->tiling == I915_TILING_Y)
1206
		ss[0] |= GEN7_SURFACE_VALIGN_4;
1228
		ss[0] |= GEN7_SURFACE_VALIGN_4;
1207
	if (is_dst) {
1229
	if (is_dst) {
1208
		ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
1230
		ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
1209
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1231
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1210
	} else
1232
	} else
1211
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1233
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1212
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1234
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1213
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1235
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1214
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1236
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1215
	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1237
	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1216
	ss[4] = 0;
1238
	ss[4] = 0;
1217
	ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
1239
	ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
1218
	ss[6] = 0;
1240
	ss[6] = 0;
1219
	ss[7] = 0;
1241
	ss[7] = 0;
1220
	if (is_hsw(sna))
1242
	if (is_hsw(sna))
1221
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1243
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1222
 
1244
 
1223
	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1245
	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1224
 
1246
 
1225
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1247
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1226
	     offset, bo->handle, ss[1],
1248
	     offset, bo->handle, ss[1],
1227
	     format, width, height, bo->pitch, bo->tiling,
1249
	     format, width, height, bo->pitch, bo->tiling,
1228
	     domains & 0xffff ? "render" : "sampler"));
1250
	     domains & 0xffff ? "render" : "sampler"));
1229
 
1251
 
1230
	return offset * sizeof(uint32_t);
1252
	return offset * sizeof(uint32_t);
1231
}
1253
}
1232
 
1254
 
1233
static void gen7_emit_vertex_buffer(struct sna *sna,
1255
static void gen7_emit_vertex_buffer(struct sna *sna,
1234
				    const struct sna_composite_op *op)
1256
				    const struct sna_composite_op *op)
1235
{
1257
{
1236
	int id = GEN7_VERTEX(op->u.gen7.flags);
1258
	int id = GEN7_VERTEX(op->u.gen7.flags);
1237
 
1259
 
1238
	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1260
	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1239
	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1261
	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1240
		  GEN7_VB0_VERTEXDATA |
1262
		  GEN7_VB0_VERTEXDATA |
1241
		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1263
		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1242
		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1264
		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1243
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1265
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1244
	OUT_BATCH(0);
1266
	OUT_BATCH(0);
1245
	OUT_BATCH(~0); /* max address: disabled */
1267
	OUT_BATCH(~0); /* max address: disabled */
1246
	OUT_BATCH(0);
1268
	OUT_BATCH(0);
1247
 
1269
 
1248
	sna->render.vb_id |= 1 << id;
1270
	sna->render.vb_id |= 1 << id;
1249
}
1271
}
1250
 
1272
 
1251
static void gen7_emit_primitive(struct sna *sna)
1273
static void gen7_emit_primitive(struct sna *sna)
1252
{
1274
{
1253
	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1275
	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1254
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1276
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1255
		return;
1277
		return;
1256
	}
1278
	}
1257
 
1279
 
1258
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1280
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1259
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1281
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1260
	sna->render.vertex_offset = sna->kgem.nbatch;
1282
	sna->render.vertex_offset = sna->kgem.nbatch;
1261
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1283
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1262
	OUT_BATCH(sna->render.vertex_index);
1284
	OUT_BATCH(sna->render.vertex_index);
1263
	OUT_BATCH(1);	/* single instance */
1285
	OUT_BATCH(1);	/* single instance */
1264
	OUT_BATCH(0);	/* start instance location */
1286
	OUT_BATCH(0);	/* start instance location */
1265
	OUT_BATCH(0);	/* index buffer offset, ignored */
1287
	OUT_BATCH(0);	/* index buffer offset, ignored */
1266
	sna->render.vertex_start = sna->render.vertex_index;
1288
	sna->render.vertex_start = sna->render.vertex_index;
1267
 
1289
 
1268
	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1290
	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1269
}
1291
}
1270
 
1292
 
1271
static bool gen7_rectangle_begin(struct sna *sna,
1293
static bool gen7_rectangle_begin(struct sna *sna,
1272
				 const struct sna_composite_op *op)
1294
				 const struct sna_composite_op *op)
1273
{
1295
{
1274
	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1296
	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1275
	int ndwords;
1297
	int ndwords;
1276
 
1298
 
1277
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1299
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1278
		return true;
1300
		return true;
1279
 
1301
 
1280
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1302
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1281
	if ((sna->render.vb_id & id) == 0)
1303
	if ((sna->render.vb_id & id) == 0)
1282
		ndwords += 5;
1304
		ndwords += 5;
1283
	if (!kgem_check_batch(&sna->kgem, ndwords))
1305
	if (!kgem_check_batch(&sna->kgem, ndwords))
1284
		return false;
1306
		return false;
1285
 
1307
 
1286
	if ((sna->render.vb_id & id) == 0)
1308
	if ((sna->render.vb_id & id) == 0)
1287
		gen7_emit_vertex_buffer(sna, op);
1309
		gen7_emit_vertex_buffer(sna, op);
1288
 
1310
 
1289
	gen7_emit_primitive(sna);
1311
	gen7_emit_primitive(sna);
1290
	return true;
1312
	return true;
1291
}
1313
}
1292
 
1314
 
1293
static int gen7_get_rectangles__flush(struct sna *sna,
1315
static int gen7_get_rectangles__flush(struct sna *sna,
1294
				      const struct sna_composite_op *op)
1316
				      const struct sna_composite_op *op)
1295
{
1317
{
1296
	/* Preventing discarding new vbo after lock contention */
1318
	/* Preventing discarding new vbo after lock contention */
1297
	if (sna_vertex_wait__locked(&sna->render)) {
1319
	if (sna_vertex_wait__locked(&sna->render)) {
1298
		int rem = vertex_space(sna);
1320
		int rem = vertex_space(sna);
1299
		if (rem > op->floats_per_rect)
1321
		if (rem > op->floats_per_rect)
1300
			return rem;
1322
			return rem;
1301
	}
1323
	}
1302
 
1324
 
1303
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1325
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1304
		return 0;
1326
		return 0;
1305
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1327
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1306
		return 0;
1328
		return 0;
1307
 
1329
 
1308
	if (sna->render.vertex_offset) {
1330
	if (sna->render.vertex_offset) {
1309
		gen4_vertex_flush(sna);
1331
		gen4_vertex_flush(sna);
1310
		if (gen7_magic_ca_pass(sna, op)) {
1332
		if (gen7_magic_ca_pass(sna, op)) {
1311
			gen7_emit_pipe_stall(sna);
1333
			gen7_emit_pipe_stall(sna);
1312
			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1334
			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1313
			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1335
			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1314
		}
1336
		}
1315
	}
1337
	}
1316
 
1338
 
1317
	return gen4_vertex_finish(sna);
1339
	return gen4_vertex_finish(sna);
1318
}
1340
}
1319
 
1341
 
1320
inline static int gen7_get_rectangles(struct sna *sna,
1342
inline static int gen7_get_rectangles(struct sna *sna,
1321
				      const struct sna_composite_op *op,
1343
				      const struct sna_composite_op *op,
1322
				      int want,
1344
				      int want,
1323
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1345
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1324
{
1346
{
1325
	int rem;
1347
	int rem;
1326
 
1348
 
1327
	assert(want);
1349
	assert(want);
1328
 
1350
 
1329
start:
1351
start:
1330
	rem = vertex_space(sna);
1352
	rem = vertex_space(sna);
1331
	if (unlikely(rem < op->floats_per_rect)) {
1353
	if (unlikely(rem < op->floats_per_rect)) {
1332
		DBG(("flushing vbo for %s: %d < %d\n",
1354
		DBG(("flushing vbo for %s: %d < %d\n",
1333
		     __FUNCTION__, rem, op->floats_per_rect));
1355
		     __FUNCTION__, rem, op->floats_per_rect));
1334
		rem = gen7_get_rectangles__flush(sna, op);
1356
		rem = gen7_get_rectangles__flush(sna, op);
1335
		if (unlikely(rem == 0))
1357
		if (unlikely(rem == 0))
1336
			goto flush;
1358
			goto flush;
1337
	}
1359
	}
1338
 
1360
 
1339
	if (unlikely(sna->render.vertex_offset == 0)) {
1361
	if (unlikely(sna->render.vertex_offset == 0)) {
1340
		if (!gen7_rectangle_begin(sna, op))
1362
		if (!gen7_rectangle_begin(sna, op))
1341
			goto flush;
1363
			goto flush;
1342
		else
1364
		else
1343
			goto start;
1365
			goto start;
1344
	}
1366
	}
1345
 
1367
 
1346
	assert(rem <= vertex_space(sna));
1368
	assert(rem <= vertex_space(sna));
1347
	assert(op->floats_per_rect <= rem);
1369
	assert(op->floats_per_rect <= rem);
1348
	if (want > 1 && want * op->floats_per_rect > rem)
1370
	if (want > 1 && want * op->floats_per_rect > rem)
1349
		want = rem / op->floats_per_rect;
1371
		want = rem / op->floats_per_rect;
1350
 
1372
 
1351
	assert(want > 0);
1373
	assert(want > 0);
1352
	sna->render.vertex_index += 3*want;
1374
	sna->render.vertex_index += 3*want;
1353
	return want;
1375
	return want;
1354
 
1376
 
1355
flush:
1377
flush:
1356
	if (sna->render.vertex_offset) {
1378
	if (sna->render.vertex_offset) {
1357
		gen4_vertex_flush(sna);
1379
		gen4_vertex_flush(sna);
1358
		gen7_magic_ca_pass(sna, op);
1380
		gen7_magic_ca_pass(sna, op);
1359
	}
1381
	}
1360
	sna_vertex_wait__locked(&sna->render);
1382
	sna_vertex_wait__locked(&sna->render);
1361
	_kgem_submit(&sna->kgem);
1383
	_kgem_submit(&sna->kgem);
1362
	emit_state(sna, op);
1384
	emit_state(sna, op);
1363
	goto start;
1385
	goto start;
1364
}
1386
}
1365
 
1387
 
1366
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1388
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1367
							 uint16_t *offset)
1389
							 uint16_t *offset)
1368
{
1390
{
1369
	uint32_t *table;
1391
	uint32_t *table;
1370
 
1392
 
1371
	sna->kgem.surface -=
1393
	sna->kgem.surface -=
1372
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1394
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1373
	/* Clear all surplus entries to zero in case of prefetch */
1395
	/* Clear all surplus entries to zero in case of prefetch */
1374
	table = memset(sna->kgem.batch + sna->kgem.surface,
1396
	table = memset(sna->kgem.batch + sna->kgem.surface,
1375
		       0, sizeof(struct gen7_surface_state));
1397
		       0, sizeof(struct gen7_surface_state));
1376
 
1398
 
1377
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1399
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1378
 
1400
 
1379
	*offset = sna->kgem.surface;
1401
	*offset = sna->kgem.surface;
1380
	return table;
1402
	return table;
1381
}
1403
}
1382
 
1404
 
1383
static void
1405
static void
1384
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1406
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1385
{
1407
{
1386
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1408
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1387
 
1409
 
1388
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1410
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1389
		DBG(("%s: flushing batch: %d < %d+%d\n",
1411
		DBG(("%s: flushing batch: %d < %d+%d\n",
1390
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1412
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1391
		     150, 4*8));
1413
		     150, 4*8));
1392
		_kgem_submit(&sna->kgem);
1414
		_kgem_submit(&sna->kgem);
1393
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1415
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1394
	}
1416
	}
1395
 
1417
 
1396
	assert(sna->kgem.mode == KGEM_RENDER);
1418
	assert(sna->kgem.mode == KGEM_RENDER);
1397
	assert(sna->kgem.ring == KGEM_RENDER);
1419
	assert(sna->kgem.ring == KGEM_RENDER);
1398
 
1420
 
1399
	if (sna->render_state.gen7.needs_invariant)
1421
	if (sna->render_state.gen7.needs_invariant)
1400
		gen7_emit_invariant(sna);
1422
		gen7_emit_invariant(sna);
1401
}
1423
}
1402
 
1424
 
1403
static void gen7_emit_composite_state(struct sna *sna,
1425
static void gen7_emit_composite_state(struct sna *sna,
1404
				      const struct sna_composite_op *op)
1426
				      const struct sna_composite_op *op)
1405
{
1427
{
1406
	uint32_t *binding_table;
1428
	uint32_t *binding_table;
1407
	uint16_t offset;
1429
	uint16_t offset, dirty;
1408
 
1430
 
1409
	gen7_get_batch(sna, op);
1431
	gen7_get_batch(sna, op);
1410
 
1432
 
1411
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1433
	binding_table = gen7_composite_get_binding_table(sna, &offset);
-
 
1434
 
-
 
1435
	dirty = kgem_bo_is_dirty(op->dst.bo);
1412
 
1436
 
1413
	binding_table[0] =
1437
	binding_table[0] =
1414
		gen7_bind_bo(sna,
1438
		gen7_bind_bo(sna,
1415
			    op->dst.bo, op->dst.width, op->dst.height,
1439
			    op->dst.bo, op->dst.width, op->dst.height,
1416
			    gen7_get_dest_format(op->dst.format),
1440
			    gen7_get_dest_format(op->dst.format),
1417
			    true);
1441
			    true);
1418
	binding_table[1] =
1442
	binding_table[1] =
1419
		gen7_bind_bo(sna,
1443
		gen7_bind_bo(sna,
1420
			     op->src.bo, op->src.width, op->src.height,
1444
			     op->src.bo, op->src.width, op->src.height,
1421
			     op->src.card_format,
1445
			     op->src.card_format,
1422
			     false);
1446
			     false);
1423
	if (op->mask.bo) {
1447
	if (op->mask.bo) {
1424
		binding_table[2] =
1448
		binding_table[2] =
1425
			gen7_bind_bo(sna,
1449
			gen7_bind_bo(sna,
1426
				     op->mask.bo,
1450
				     op->mask.bo,
1427
				     op->mask.width,
1451
				     op->mask.width,
1428
				     op->mask.height,
1452
				     op->mask.height,
1429
				     op->mask.card_format,
1453
				     op->mask.card_format,
1430
				     false);
1454
				     false);
1431
	}
1455
	}
1432
 
1456
 
1433
	if (sna->kgem.surface == offset &&
1457
	if (sna->kgem.surface == offset &&
1434
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1458
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1435
	    (op->mask.bo == NULL ||
1459
	    (op->mask.bo == NULL ||
1436
	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1460
	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1437
		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1461
		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1438
		offset = sna->render_state.gen7.surface_table;
1462
		offset = sna->render_state.gen7.surface_table;
1439
	}
1463
	}
1440
 
1464
 
1441
	gen7_emit_state(sna, op, offset);
1465
	gen7_emit_state(sna, op, offset | dirty);
1442
}
1466
}
1443
 
1467
 
1444
static void
1468
static void
1445
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1469
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1446
{
1470
{
1447
	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
1471
	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
1448
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
-
 
1449
			gen4_vertex_finish(sna);
-
 
1450
 
-
 
1451
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1472
		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1452
		     sna->render_state.gen7.floats_per_vertex,
1473
		     sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex));
1453
		     op->floats_per_vertex,
-
 
1454
		     sna->render.vertex_index,
1474
		gen4_vertex_align(sna, op);
1455
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
-
 
1456
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
-
 
1457
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
-
 
1458
		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1475
		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1459
	}
1476
	}
1460
}
1477
}
1461
 
1478
 
1462
fastcall static void
1479
fastcall static void
1463
gen7_render_composite_blt(struct sna *sna,
1480
gen7_render_composite_blt(struct sna *sna,
1464
			  const struct sna_composite_op *op,
1481
			  const struct sna_composite_op *op,
1465
			  const struct sna_composite_rectangles *r)
1482
			  const struct sna_composite_rectangles *r)
1466
{
1483
{
1467
	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1484
	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1468
	op->prim_emit(sna, op, r);
1485
	op->prim_emit(sna, op, r);
1469
}
1486
}
1470
static uint32_t
1487
static uint32_t
1471
gen7_composite_create_blend_state(struct sna_static_stream *stream)
1488
gen7_composite_create_blend_state(struct sna_static_stream *stream)
1472
{
1489
{
1473
	char *base, *ptr;
1490
	char *base, *ptr;
1474
	int src, dst;
1491
	int src, dst;
1475
 
1492
 
1476
	base = sna_static_stream_map(stream,
1493
	base = sna_static_stream_map(stream,
1477
				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1494
				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1478
				     64);
1495
				     64);
1479
 
1496
 
1480
	ptr = base;
1497
	ptr = base;
1481
	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1498
	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1482
		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1499
		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1483
			struct gen7_blend_state *blend =
1500
			struct gen7_blend_state *blend =
1484
				(struct gen7_blend_state *)ptr;
1501
				(struct gen7_blend_state *)ptr;
1485
 
1502
 
1486
			blend->blend0.dest_blend_factor = dst;
1503
			blend->blend0.dest_blend_factor = dst;
1487
			blend->blend0.source_blend_factor = src;
1504
			blend->blend0.source_blend_factor = src;
1488
			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1505
			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1489
			blend->blend0.blend_enable =
1506
			blend->blend0.blend_enable =
1490
				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1507
				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1491
 
1508
 
1492
			blend->blend1.post_blend_clamp_enable = 1;
1509
			blend->blend1.post_blend_clamp_enable = 1;
1493
			blend->blend1.pre_blend_clamp_enable = 1;
1510
			blend->blend1.pre_blend_clamp_enable = 1;
1494
 
1511
 
1495
			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1512
			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1496
		}
1513
		}
1497
	}
1514
	}
1498
 
1515
 
1499
	return sna_static_stream_offsetof(stream, base);
1516
	return sna_static_stream_offsetof(stream, base);
1500
}
1517
}
1501
 
1518
 
1502
#if 0
1519
#if 0
1503
static uint32_t gen7_bind_video_source(struct sna *sna,
1520
static uint32_t gen7_bind_video_source(struct sna *sna,
1504
				       struct kgem_bo *bo,
1521
				       struct kgem_bo *bo,
1505
				       uint32_t offset,
1522
				       uint32_t offset,
1506
				       int width,
1523
				       int width,
1507
				       int height,
1524
				       int height,
1508
				       int pitch,
1525
				       int pitch,
1509
				       uint32_t format)
1526
				       uint32_t format)
1510
{
1527
{
1511
	uint32_t *ss, bind;
1528
	uint32_t *ss, bind;
1512
 
1529
 
1513
	bind = sna->kgem.surface -=
1530
	bind = sna->kgem.surface -=
1514
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1531
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1515
 
1532
 
1516
	assert(bo->tiling == I915_TILING_NONE);
1533
	assert(bo->tiling == I915_TILING_NONE);
1517
 
1534
 
1518
	ss = sna->kgem.batch + bind;
1535
	ss = sna->kgem.batch + bind;
1519
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1536
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1520
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1537
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1521
	ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
1538
	ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
1522
			       I915_GEM_DOMAIN_SAMPLER << 16,
1539
			       I915_GEM_DOMAIN_SAMPLER << 16,
1523
			       offset);
1540
			       offset);
1524
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1541
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1525
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1542
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1526
	ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1543
	ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1527
	ss[4] = 0;
1544
	ss[4] = 0;
1528
	ss[5] = 0;
1545
	ss[5] = 0;
1529
	ss[6] = 0;
1546
	ss[6] = 0;
1530
	ss[7] = 0;
1547
	ss[7] = 0;
1531
	if (is_hsw(sna))
1548
	if (is_hsw(sna))
1532
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1549
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1533
 
1550
 
1534
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
1551
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
1535
	     bind, bo->handle, ss[1],
1552
	     bind, bo->handle, ss[1],
1536
	     format, width, height, pitch, offset));
1553
	     format, width, height, pitch, offset));
1537
 
1554
 
1538
	return bind * sizeof(uint32_t);
1555
	return bind * sizeof(uint32_t);
1539
}
1556
}
1540
 
1557
 
1541
static void gen7_emit_video_state(struct sna *sna,
1558
static void gen7_emit_video_state(struct sna *sna,
1542
				  const struct sna_composite_op *op)
1559
				  const struct sna_composite_op *op)
1543
{
1560
{
1544
	struct sna_video_frame *frame = op->priv;
1561
	struct sna_video_frame *frame = op->priv;
1545
	uint32_t src_surf_format;
1562
	uint32_t src_surf_format;
1546
	uint32_t src_surf_base[6];
1563
	uint32_t src_surf_base[6];
1547
	int src_width[6];
1564
	int src_width[6];
1548
	int src_height[6];
1565
	int src_height[6];
1549
	int src_pitch[6];
1566
	int src_pitch[6];
1550
	uint32_t *binding_table;
1567
	uint32_t *binding_table;
1551
	uint16_t offset;
1568
	uint16_t offset, dirty;
1552
	int n_src, n;
1569
	int n_src, n;
1553
 
1570
 
1554
	gen7_get_batch(sna, op);
1571
	gen7_get_batch(sna, op);
1555
 
1572
 
1556
	src_surf_base[0] = 0;
1573
	src_surf_base[0] = 0;
1557
	src_surf_base[1] = 0;
1574
	src_surf_base[1] = 0;
1558
	src_surf_base[2] = frame->VBufOffset;
1575
	src_surf_base[2] = frame->VBufOffset;
1559
	src_surf_base[3] = frame->VBufOffset;
1576
	src_surf_base[3] = frame->VBufOffset;
1560
	src_surf_base[4] = frame->UBufOffset;
1577
	src_surf_base[4] = frame->UBufOffset;
1561
	src_surf_base[5] = frame->UBufOffset;
1578
	src_surf_base[5] = frame->UBufOffset;
1562
 
1579
 
1563
	if (is_planar_fourcc(frame->id)) {
1580
	if (is_planar_fourcc(frame->id)) {
1564
		src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
1581
		src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
1565
		src_width[1]  = src_width[0]  = frame->width;
1582
		src_width[1]  = src_width[0]  = frame->width;
1566
		src_height[1] = src_height[0] = frame->height;
1583
		src_height[1] = src_height[0] = frame->height;
1567
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1584
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1568
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1585
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1569
			frame->width / 2;
1586
			frame->width / 2;
1570
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1587
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1571
			frame->height / 2;
1588
			frame->height / 2;
1572
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1589
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1573
			frame->pitch[0];
1590
			frame->pitch[0];
1574
		n_src = 6;
1591
		n_src = 6;
1575
	} else {
1592
	} else {
1576
		if (frame->id == FOURCC_UYVY)
1593
		if (frame->id == FOURCC_UYVY)
1577
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
1594
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
1578
		else
1595
		else
1579
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
1596
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
1580
 
1597
 
1581
		src_width[0]  = frame->width;
1598
		src_width[0]  = frame->width;
1582
		src_height[0] = frame->height;
1599
		src_height[0] = frame->height;
1583
		src_pitch[0]  = frame->pitch[0];
1600
		src_pitch[0]  = frame->pitch[0];
1584
		n_src = 1;
1601
		n_src = 1;
1585
	}
1602
	}
1586
 
1603
 
1587
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1604
	binding_table = gen7_composite_get_binding_table(sna, &offset);
-
 
1605
 
-
 
1606
	dirty = kgem_bo_is_dirty(op->dst.bo);
1588
 
1607
 
1589
	binding_table[0] =
1608
	binding_table[0] =
1590
		gen7_bind_bo(sna,
1609
		gen7_bind_bo(sna,
1591
			     op->dst.bo, op->dst.width, op->dst.height,
1610
			     op->dst.bo, op->dst.width, op->dst.height,
1592
			     gen7_get_dest_format(op->dst.format),
1611
			     gen7_get_dest_format(op->dst.format),
1593
			     true);
1612
			     true);
1594
	for (n = 0; n < n_src; n++) {
1613
	for (n = 0; n < n_src; n++) {
1595
		binding_table[1+n] =
1614
		binding_table[1+n] =
1596
			gen7_bind_video_source(sna,
1615
			gen7_bind_video_source(sna,
1597
					       frame->bo,
1616
					       frame->bo,
1598
					       src_surf_base[n],
1617
					       src_surf_base[n],
1599
					       src_width[n],
1618
					       src_width[n],
1600
					       src_height[n],
1619
					       src_height[n],
1601
					       src_pitch[n],
1620
					       src_pitch[n],
1602
					       src_surf_format);
1621
					       src_surf_format);
1603
	}
1622
	}
1604
 
1623
 
1605
	gen7_emit_state(sna, op, offset);
1624
	gen7_emit_state(sna, op, offset | dirty);
1606
}
1625
}
1607
 
1626
 
1608
static bool
1627
static bool
1609
gen7_render_video(struct sna *sna,
1628
gen7_render_video(struct sna *sna,
1610
		  struct sna_video *video,
1629
		  struct sna_video *video,
1611
		  struct sna_video_frame *frame,
1630
		  struct sna_video_frame *frame,
1612
		  RegionPtr dstRegion,
1631
		  RegionPtr dstRegion,
1613
		  PixmapPtr pixmap)
1632
		  PixmapPtr pixmap)
1614
{
1633
{
1615
	struct sna_composite_op tmp;
1634
	struct sna_composite_op tmp;
1616
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1635
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1617
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1636
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1618
	int src_width = frame->src.x2 - frame->src.x1;
1637
	int src_width = frame->src.x2 - frame->src.x1;
1619
	int src_height = frame->src.y2 - frame->src.y1;
1638
	int src_height = frame->src.y2 - frame->src.y1;
1620
	float src_offset_x, src_offset_y;
1639
	float src_offset_x, src_offset_y;
1621
	float src_scale_x, src_scale_y;
1640
	float src_scale_x, src_scale_y;
1622
	int nbox, pix_xoff, pix_yoff;
1641
	int nbox, pix_xoff, pix_yoff;
1623
	struct sna_pixmap *priv;
1642
	struct sna_pixmap *priv;
1624
	unsigned filter;
1643
	unsigned filter;
1625
	BoxPtr box;
1644
	BoxPtr box;
1626
 
1645
 
1627
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
1646
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
1628
	     __FUNCTION__,
1647
	     __FUNCTION__,
1629
	     src_width, src_height, dst_width, dst_height,
1648
	     src_width, src_height, dst_width, dst_height,
1630
	     (long)REGION_NUM_RECTS(dstRegion),
1649
	     (long)REGION_NUM_RECTS(dstRegion),
1631
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1650
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1632
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1651
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1633
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1652
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1634
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1653
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1635
 
1654
 
1636
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1655
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1637
	if (priv == NULL)
1656
	if (priv == NULL)
1638
		return false;
1657
		return false;
1639
 
1658
 
1640
	memset(&tmp, 0, sizeof(tmp));
1659
	memset(&tmp, 0, sizeof(tmp));
1641
 
1660
 
1642
	tmp.dst.pixmap = pixmap;
1661
	tmp.dst.pixmap = pixmap;
1643
	tmp.dst.width  = pixmap->drawable.width;
1662
	tmp.dst.width  = pixmap->drawable.width;
1644
	tmp.dst.height = pixmap->drawable.height;
1663
	tmp.dst.height = pixmap->drawable.height;
1645
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1664
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1646
	tmp.dst.bo = priv->gpu_bo;
1665
	tmp.dst.bo = priv->gpu_bo;
1647
 
1666
 
1648
	tmp.src.bo = frame->bo;
1667
	tmp.src.bo = frame->bo;
1649
	tmp.mask.bo = NULL;
1668
	tmp.mask.bo = NULL;
1650
 
1669
 
1651
	tmp.floats_per_vertex = 3;
1670
	tmp.floats_per_vertex = 3;
1652
	tmp.floats_per_rect = 9;
1671
	tmp.floats_per_rect = 9;
1653
 
1672
 
1654
	if (src_width == dst_width && src_height == dst_height)
1673
	if (src_width == dst_width && src_height == dst_height)
1655
		filter = SAMPLER_FILTER_NEAREST;
1674
		filter = SAMPLER_FILTER_NEAREST;
1656
	else
1675
	else
1657
		filter = SAMPLER_FILTER_BILINEAR;
1676
		filter = SAMPLER_FILTER_BILINEAR;
1658
 
1677
 
1659
	tmp.u.gen7.flags =
1678
	tmp.u.gen7.flags =
1660
		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1679
		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1661
					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1680
					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1662
			       NO_BLEND,
1681
			       NO_BLEND,
1663
			       is_planar_fourcc(frame->id) ?
1682
			       is_planar_fourcc(frame->id) ?
1664
			       GEN7_WM_KERNEL_VIDEO_PLANAR :
1683
			       GEN7_WM_KERNEL_VIDEO_PLANAR :
1665
			       GEN7_WM_KERNEL_VIDEO_PACKED,
1684
			       GEN7_WM_KERNEL_VIDEO_PACKED,
1666
			       2);
1685
			       2);
1667
	tmp.priv = frame;
1686
	tmp.priv = frame;
1668
 
1687
 
1669
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1688
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1670
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1689
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1671
		kgem_submit(&sna->kgem);
1690
		kgem_submit(&sna->kgem);
1672
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1691
		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
-
 
1692
			return false;
-
 
1693
 
1673
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1694
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1674
	}
1695
	}
1675
 
-
 
1676
	gen7_emit_video_state(sna, &tmp);
1696
 
-
 
1697
	gen7_align_vertex(sna, &tmp);
1677
	gen7_align_vertex(sna, &tmp);
1698
	gen7_emit_video_state(sna, &tmp);
1678
 
1699
 
1679
	/* Set up the offset for translating from the given region (in screen
1700
	/* Set up the offset for translating from the given region (in screen
1680
	 * coordinates) to the backing pixmap.
1701
	 * coordinates) to the backing pixmap.
1681
	 */
1702
	 */
1682
#ifdef COMPOSITE
1703
#ifdef COMPOSITE
1683
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1704
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1684
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1705
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1685
#else
1706
#else
1686
	pix_xoff = 0;
1707
	pix_xoff = 0;
1687
	pix_yoff = 0;
1708
	pix_yoff = 0;
1688
#endif
1709
#endif
1689
 
1710
 
1690
	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
1711
	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
1691
	     __FUNCTION__,
1712
	     __FUNCTION__,
1692
	     frame->src.x1, frame->src.y1,
1713
	     frame->src.x1, frame->src.y1,
1693
	     src_width, src_height,
1714
	     src_width, src_height,
1694
	     dst_width, dst_height,
1715
	     dst_width, dst_height,
1695
	     frame->width, frame->height));
1716
	     frame->width, frame->height));
1696
 
1717
 
1697
	src_scale_x = (float)src_width / dst_width / frame->width;
1718
	src_scale_x = (float)src_width / dst_width / frame->width;
1698
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1719
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1699
 
1720
 
1700
	src_scale_y = (float)src_height / dst_height / frame->height;
1721
	src_scale_y = (float)src_height / dst_height / frame->height;
1701
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1722
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1702
 
1723
 
1703
	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
1724
	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
1704
	     __FUNCTION__,
1725
	     __FUNCTION__,
1705
	     src_scale_x, src_scale_y,
1726
	     src_scale_x, src_scale_y,
1706
	     src_offset_x, src_offset_y));
1727
	     src_offset_x, src_offset_y));
1707
 
1728
 
1708
	box = REGION_RECTS(dstRegion);
1729
	box = REGION_RECTS(dstRegion);
1709
	nbox = REGION_NUM_RECTS(dstRegion);
1730
	nbox = REGION_NUM_RECTS(dstRegion);
1710
	while (nbox--) {
1731
	while (nbox--) {
1711
		BoxRec r;
1732
		BoxRec r;
1712
 
1733
 
1713
		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
1734
		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
1714
		     __FUNCTION__,
1735
		     __FUNCTION__,
1715
		     box->x1, box->y1,
1736
		     box->x1, box->y1,
1716
		     box->x2, box->y2,
1737
		     box->x2, box->y2,
1717
		     pix_xoff, pix_yoff,
1738
		     pix_xoff, pix_yoff,
1718
		     box->x1 * src_scale_x + src_offset_x,
1739
		     box->x1 * src_scale_x + src_offset_x,
1719
		     box->y1 * src_scale_y + src_offset_y,
1740
		     box->y1 * src_scale_y + src_offset_y,
1720
		     box->x2 * src_scale_x + src_offset_x,
1741
		     box->x2 * src_scale_x + src_offset_x,
1721
		     box->y2 * src_scale_y + src_offset_y));
1742
		     box->y2 * src_scale_y + src_offset_y));
1722
 
1743
 
1723
		r.x1 = box->x1 + pix_xoff;
1744
		r.x1 = box->x1 + pix_xoff;
1724
		r.x2 = box->x2 + pix_xoff;
1745
		r.x2 = box->x2 + pix_xoff;
1725
		r.y1 = box->y1 + pix_yoff;
1746
		r.y1 = box->y1 + pix_yoff;
1726
		r.y2 = box->y2 + pix_yoff;
1747
		r.y2 = box->y2 + pix_yoff;
1727
 
1748
 
1728
		gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
1749
		gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
1729
 
1750
 
1730
		OUT_VERTEX(r.x2, r.y2);
1751
		OUT_VERTEX(r.x2, r.y2);
1731
		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1752
		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1732
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1753
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1733
 
1754
 
1734
		OUT_VERTEX(r.x1, r.y2);
1755
		OUT_VERTEX(r.x1, r.y2);
1735
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1756
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1736
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1757
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1737
 
1758
 
1738
		OUT_VERTEX(r.x1, r.y1);
1759
		OUT_VERTEX(r.x1, r.y1);
1739
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1760
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1740
		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1761
		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1741
 
1762
 
1742
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1763
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1743
			sna_damage_add_box(&priv->gpu_damage, &r);
1764
			sna_damage_add_box(&priv->gpu_damage, &r);
1744
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1765
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1745
		}
1766
		}
1746
		box++;
1767
		box++;
1747
	}
1768
	}
1748
 
1769
 
1749
	gen4_vertex_flush(sna);
1770
	gen4_vertex_flush(sna);
1750
	return true;
1771
	return true;
1751
}
1772
}
1752
#endif
1773
#endif
1753
 
1774
 
1754
static void gen7_render_composite_done(struct sna *sna,
1775
static void gen7_render_composite_done(struct sna *sna,
1755
				       const struct sna_composite_op *op)
1776
				       const struct sna_composite_op *op)
1756
{
1777
{
1757
	if (sna->render.vertex_offset) {
1778
	if (sna->render.vertex_offset) {
1758
		gen4_vertex_flush(sna);
1779
		gen4_vertex_flush(sna);
1759
		gen7_magic_ca_pass(sna, op);
1780
		gen7_magic_ca_pass(sna, op);
1760
	}
1781
	}
1761
}
1782
}
1762
 
1783
 
1763
 
1784
 
1764
 
1785
 
1765
 
1786
 
1766
 
1787
 
1767
 
1788
 
1768
 
1789
 
1769
 
1790
 
1770
 
1791
 
1771
 
1792
 
1772
 
1793
 
1773
 
1794
 
1774
 
1795
 
1775
 
1796
 
1776
 
1797
 
1777
 
1798
 
1778
 
1799
 
1779
 
1800
 
1780
 
1801
 
1781
 
1802
 
1782
 
1803
 
1783
 
1804
 
1784
 
1805
 
1785
 
1806
 
1786
 
1807
 
1787
 
1808
 
1788
 
1809
 
1789
 
1810
 
1790
 
1811
 
1791
 
1812
 
1792
 
1813
 
1793
 
1814
 
1794
 
1815
 
1795
 
1816
 
1796
 
1817
 
1797
 
1818
 
1798
 
1819
 
1799
 
1820
 
1800
 
1821
 
1801
 
1822
 
1802
 
1823
 
1803
 
1824
 
1804
 
1825
 
1805
 
1826
 
1806
 
1827
 
1807
 
1828
 
1808
 
1829
 
1809
 
1830
 
1810
 
1831
 
1811
 
1832
 
1812
 
1833
 
1813
 
1834
 
1814
 
1835
 
1815
 
1836
 
1816
 
1837
 
1817
 
1838
 
1818
 
1839
 
1819
 
1840
 
1820
 
1841
 
1821
 
1842
 
1822
 
1843
 
1823
 
1844
 
1824
 
1845
 
1825
 
1846
 
1826
 
1847
 
1827
 
1848
 
1828
 
1849
 
1829
 
1850
 
1830
 
1851
 
1831
 
1852
 
1832
 
1853
 
1833
 
1854
 
1834
 
1855
 
1835
 
1856
 
1836
 
1857
 
1837
 
1858
 
1838
 
1859
 
1839
 
1860
 
1840
 
1861
 
1841
 
1862
 
1842
 
1863
 
1843
 
1864
 
1844
 
1865
 
1845
 
1866
 
1846
 
1867
 
1847
 
1868
 
1848
 
1869
 
1849
 
1870
 
1850
 
1871
 
1851
 
1872
 
1852
 
1873
 
1853
 
1874
 
1854
 
1875
 
1855
#if 0
1876
#if 0
1856
static bool
1877
static bool
1857
gen7_render_fill_boxes(struct sna *sna,
1878
gen7_render_fill_boxes(struct sna *sna,
1858
		       CARD8 op,
1879
		       CARD8 op,
1859
		       PictFormat format,
1880
		       PictFormat format,
1860
		       const xRenderColor *color,
1881
		       const xRenderColor *color,
1861
		       PixmapPtr dst, struct kgem_bo *dst_bo,
1882
		       PixmapPtr dst, struct kgem_bo *dst_bo,
1862
		       const BoxRec *box, int n)
1883
		       const BoxRec *box, int n)
1863
{
1884
{
1864
	struct sna_composite_op tmp;
1885
	struct sna_composite_op tmp;
1865
	uint32_t pixel;
1886
	uint32_t pixel;
1866
 
1887
 
1867
	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
1888
	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
1868
	     __FUNCTION__, op,
1889
	     __FUNCTION__, op,
1869
	     color->red, color->green, color->blue, color->alpha, (int)format));
1890
	     color->red, color->green, color->blue, color->alpha, (int)format));
1870
 
1891
 
1871
	if (op >= ARRAY_SIZE(gen7_blend_op)) {
1892
	if (op >= ARRAY_SIZE(gen7_blend_op)) {
1872
		DBG(("%s: fallback due to unhandled blend op: %d\n",
1893
		DBG(("%s: fallback due to unhandled blend op: %d\n",
1873
		     __FUNCTION__, op));
1894
		     __FUNCTION__, op));
1874
		return false;
1895
		return false;
1875
	}
1896
	}
-
 
1897
 
1876
 
1898
	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
1877
	if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
1899
	    !gen7_check_dst_format(format)) {
1878
		uint8_t alu = GXinvalid;
1900
		uint8_t alu = GXinvalid;
1879
 
1901
 
1880
		if (op <= PictOpSrc) {
1902
		if (op <= PictOpSrc) {
1881
			pixel = 0;
1903
			pixel = 0;
1882
			if (op == PictOpClear)
1904
			if (op == PictOpClear)
1883
				alu = GXclear;
1905
				alu = GXclear;
1884
			else if (sna_get_pixel_from_rgba(&pixel,
1906
			else if (sna_get_pixel_from_rgba(&pixel,
1885
							 color->red,
1907
							 color->red,
1886
							 color->green,
1908
							 color->green,
1887
							 color->blue,
1909
							 color->blue,
1888
							 color->alpha,
1910
							 color->alpha,
1889
							 format))
1911
							 format))
1890
				alu = GXcopy;
1912
				alu = GXcopy;
1891
		}
1913
		}
1892
 
1914
 
1893
		if (alu != GXinvalid &&
1915
		if (alu != GXinvalid &&
1894
		    sna_blt_fill_boxes(sna, alu,
1916
		    sna_blt_fill_boxes(sna, alu,
1895
				       dst_bo, dst->drawable.bitsPerPixel,
1917
				       dst_bo, dst->drawable.bitsPerPixel,
1896
				       pixel, box, n))
1918
				       pixel, box, n))
1897
			return true;
1919
			return true;
1898
 
1920
 
1899
		if (!gen7_check_dst_format(format))
1921
		if (!gen7_check_dst_format(format))
1900
			return false;
1922
			return false;
1901
	}
1923
	}
1902
 
1924
 
1903
	if (op == PictOpClear) {
1925
	if (op == PictOpClear) {
1904
		pixel = 0;
1926
		pixel = 0;
1905
		op = PictOpSrc;
1927
		op = PictOpSrc;
1906
	} else if (!sna_get_pixel_from_rgba(&pixel,
1928
	} else if (!sna_get_pixel_from_rgba(&pixel,
1907
					    color->red,
1929
					    color->red,
1908
					    color->green,
1930
					    color->green,
1909
					    color->blue,
1931
					    color->blue,
1910
					    color->alpha,
1932
					    color->alpha,
1911
					    PICT_a8r8g8b8))
1933
					    PICT_a8r8g8b8))
1912
		return false;
1934
		return false;
1913
 
1935
 
1914
	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
1936
	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
1915
	     __FUNCTION__, pixel, n,
1937
	     __FUNCTION__, pixel, n,
1916
	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
1938
	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
1917
 
1939
 
1918
	tmp.dst.pixmap = dst;
1940
	tmp.dst.pixmap = dst;
1919
	tmp.dst.width  = dst->drawable.width;
1941
	tmp.dst.width  = dst->drawable.width;
1920
	tmp.dst.height = dst->drawable.height;
1942
	tmp.dst.height = dst->drawable.height;
1921
	tmp.dst.format = format;
1943
	tmp.dst.format = format;
1922
	tmp.dst.bo = dst_bo;
1944
	tmp.dst.bo = dst_bo;
1923
	tmp.dst.x = tmp.dst.y = 0;
1945
	tmp.dst.x = tmp.dst.y = 0;
1924
	tmp.damage = NULL;
1946
	tmp.damage = NULL;
1925
 
1947
 
1926
	sna_render_composite_redirect_init(&tmp);
1948
	sna_render_composite_redirect_init(&tmp);
1927
	if (too_large(dst->drawable.width, dst->drawable.height)) {
1949
	if (too_large(dst->drawable.width, dst->drawable.height)) {
1928
		BoxRec extents;
1950
		BoxRec extents;
1929
 
1951
 
1930
		boxes_extents(box, n, &extents);
1952
		boxes_extents(box, n, &extents);
1931
		if (!sna_render_composite_redirect(sna, &tmp,
1953
		if (!sna_render_composite_redirect(sna, &tmp,
1932
						   extents.x1, extents.y1,
1954
						   extents.x1, extents.y1,
1933
						   extents.x2 - extents.x1,
1955
						   extents.x2 - extents.x1,
1934
						   extents.y2 - extents.y1,
1956
						   extents.y2 - extents.y1,
1935
						   n > 1))
1957
						   n > 1))
1936
			return sna_tiling_fill_boxes(sna, op, format, color,
1958
			return sna_tiling_fill_boxes(sna, op, format, color,
1937
						     dst, dst_bo, box, n);
1959
						     dst, dst_bo, box, n);
1938
	}
1960
	}
1939
 
1961
 
1940
	tmp.src.bo = sna_render_get_solid(sna, pixel);
1962
	tmp.src.bo = sna_render_get_solid(sna, pixel);
1941
	tmp.mask.bo = NULL;
1963
	tmp.mask.bo = NULL;
1942
 
1964
 
1943
	tmp.floats_per_vertex = 2;
1965
	tmp.floats_per_vertex = 2;
1944
	tmp.floats_per_rect = 6;
1966
	tmp.floats_per_rect = 6;
1945
	tmp.need_magic_ca_pass = false;
1967
	tmp.need_magic_ca_pass = false;
1946
 
1968
 
1947
	tmp.u.gen7.flags = FILL_FLAGS(op, format);
1969
	tmp.u.gen7.flags = FILL_FLAGS(op, format);
1948
 
1970
 
1949
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
1971
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
1950
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
1972
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
1951
		kgem_submit(&sna->kgem);
1973
		kgem_submit(&sna->kgem);
1952
		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
1974
		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
-
 
1975
			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
-
 
1976
			if (tmp.redirect.real_bo)
-
 
1977
				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
-
 
1978
			return false;
-
 
1979
		}
-
 
1980
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1953
	}
1981
	}
1954
 
-
 
1955
	gen7_emit_fill_state(sna, &tmp);
1982
 
-
 
1983
	gen7_align_vertex(sna, &tmp);
1956
	gen7_align_vertex(sna, &tmp);
1984
	gen7_emit_fill_state(sna, &tmp);
1957
 
1985
 
1958
	do {
1986
	do {
1959
		int n_this_time;
1987
		int n_this_time;
1960
		int16_t *v;
1988
		int16_t *v;
1961
 
1989
 
1962
		n_this_time = gen7_get_rectangles(sna, &tmp, n,
1990
		n_this_time = gen7_get_rectangles(sna, &tmp, n,
1963
						  gen7_emit_fill_state);
1991
						  gen7_emit_fill_state);
1964
		n -= n_this_time;
1992
		n -= n_this_time;
1965
 
1993
 
1966
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
1994
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
1967
		sna->render.vertex_used += 6 * n_this_time;
1995
		sna->render.vertex_used += 6 * n_this_time;
1968
		assert(sna->render.vertex_used <= sna->render.vertex_size);
1996
		assert(sna->render.vertex_used <= sna->render.vertex_size);
1969
		do {
1997
		do {
1970
			DBG(("	(%d, %d), (%d, %d)\n",
1998
			DBG(("	(%d, %d), (%d, %d)\n",
1971
			     box->x1, box->y1, box->x2, box->y2));
1999
			     box->x1, box->y1, box->x2, box->y2));
1972
 
2000
 
1973
			v[0] = box->x2;
2001
			v[0] = box->x2;
1974
			v[5] = v[1] = box->y2;
2002
			v[5] = v[1] = box->y2;
1975
			v[8] = v[4] = box->x1;
2003
			v[8] = v[4] = box->x1;
1976
			v[9] = box->y1;
2004
			v[9] = box->y1;
1977
			v[2] = v[3]  = v[7]  = 1;
2005
			v[2] = v[3]  = v[7]  = 1;
1978
			v[6] = v[10] = v[11] = 0;
2006
			v[6] = v[10] = v[11] = 0;
1979
			v += 12; box++;
2007
			v += 12; box++;
1980
		} while (--n_this_time);
2008
		} while (--n_this_time);
1981
	} while (n);
2009
	} while (n);
1982
 
2010
 
1983
	gen4_vertex_flush(sna);
2011
	gen4_vertex_flush(sna);
1984
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2012
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
1985
	sna_render_composite_redirect_done(sna, &tmp);
2013
	sna_render_composite_redirect_done(sna, &tmp);
1986
	return true;
2014
	return true;
1987
}
2015
}
1988
#endif
2016
#endif
1989
 
-
 
1990
static void gen7_render_flush(struct sna *sna)
-
 
1991
{
-
 
1992
	gen4_vertex_close(sna);
-
 
1993
 
-
 
1994
	assert(sna->render.vb_id == 0);
-
 
1995
	assert(sna->render.vertex_offset == 0);
-
 
1996
}
-
 
1997
 
-
 
1998
static void
-
 
1999
gen7_render_context_switch(struct kgem *kgem,
-
 
2000
			   int new_mode)
-
 
2001
{
-
 
2002
	if (kgem->nbatch) {
-
 
2003
		DBG(("%s: switch rings %d -> %d\n",
-
 
2004
		     __FUNCTION__, kgem->mode, new_mode));
-
 
2005
		_kgem_submit(kgem);
-
 
2006
	}
-
 
2007
 
-
 
2008
	kgem->ring = new_mode;
-
 
2009
}
-
 
2010
 
-
 
2011
static void
-
 
2012
gen7_render_retire(struct kgem *kgem)
-
 
2013
{
-
 
2014
	struct sna *sna;
-
 
2015
 
-
 
2016
	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
-
 
2017
		kgem->ring = kgem->mode;
-
 
2018
 
-
 
2019
	sna = container_of(kgem, struct sna, kgem);
-
 
2020
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
-
 
2021
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
-
 
2022
		sna->render.vertex_used = 0;
-
 
2023
		sna->render.vertex_index = 0;
-
 
2024
	}
-
 
2025
}
-
 
2026
 
-
 
2027
static void
-
 
2028
gen7_render_expire(struct kgem *kgem)
-
 
2029
{
-
 
2030
	struct sna *sna;
-
 
2031
 
-
 
2032
	sna = container_of(kgem, struct sna, kgem);
-
 
2033
	if (sna->render.vbo && !sna->render.vertex_used) {
-
 
2034
		DBG(("%s: discarding vbo\n", __FUNCTION__));
-
 
2035
		kgem_bo_destroy(kgem, sna->render.vbo);
-
 
2036
		sna->render.vbo = NULL;
-
 
2037
		sna->render.vertices = sna->render.vertex_data;
-
 
2038
		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
-
 
2039
		sna->render.vertex_used = 0;
-
 
2040
		sna->render.vertex_index = 0;
-
 
2041
	}
-
 
2042
}
-
 
2043
 
2017
 
2044
static void gen7_render_reset(struct sna *sna)
2018
static void gen7_render_reset(struct sna *sna)
2045
{
2019
{
2046
	sna->render_state.gen7.emit_flush = false;
2020
	sna->render_state.gen7.emit_flush = false;
2047
	sna->render_state.gen7.needs_invariant = true;
2021
	sna->render_state.gen7.needs_invariant = true;
2048
	sna->render_state.gen7.ve_id = 3 << 2;
2022
	sna->render_state.gen7.ve_id = 3 << 2;
2049
	sna->render_state.gen7.last_primitive = -1;
2023
	sna->render_state.gen7.last_primitive = -1;
2050
 
2024
 
2051
	sna->render_state.gen7.num_sf_outputs = 0;
2025
	sna->render_state.gen7.num_sf_outputs = 0;
2052
	sna->render_state.gen7.samplers = -1;
2026
	sna->render_state.gen7.samplers = -1;
2053
	sna->render_state.gen7.blend = -1;
2027
	sna->render_state.gen7.blend = -1;
2054
	sna->render_state.gen7.kernel = -1;
2028
	sna->render_state.gen7.kernel = -1;
2055
	sna->render_state.gen7.drawrect_offset = -1;
2029
	sna->render_state.gen7.drawrect_offset = -1;
2056
	sna->render_state.gen7.drawrect_limit = -1;
2030
	sna->render_state.gen7.drawrect_limit = -1;
2057
	sna->render_state.gen7.surface_table = -1;
2031
	sna->render_state.gen7.surface_table = -1;
-
 
2032
 
-
 
2033
	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
-
 
2034
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
-
 
2035
		discard_vbo(sna);
-
 
2036
	}
2058
 
2037
 
2059
	sna->render.vertex_offset = 0;
2038
	sna->render.vertex_offset = 0;
2060
	sna->render.nvertex_reloc = 0;
2039
	sna->render.nvertex_reloc = 0;
2061
	sna->render.vb_id = 0;
2040
	sna->render.vb_id = 0;
2062
}
2041
}
2063
 
2042
 
2064
static void gen7_render_fini(struct sna *sna)
2043
static void gen7_render_fini(struct sna *sna)
2065
{
2044
{
2066
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
2045
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
2067
}
2046
}
2068
 
2047
 
2069
static bool is_gt3(struct sna *sna)
2048
static bool is_gt3(struct sna *sna, int devid)
2070
{
2049
{
2071
	assert(sna->kgem.gen == 075);
2050
	assert(sna->kgem.gen == 075);
2072
	return sna->PciInfo->device_id & 0x20;
2051
	return devid & 0x20;
2073
}
2052
}
2074
 
2053
 
2075
static bool is_gt2(struct sna *sna)
2054
static bool is_gt2(struct sna *sna, int devid)
2076
{
2055
{
2077
	return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
2056
	return devid & (is_hsw(sna)? 0x30 : 0x20);
2078
}
2057
}
2079
 
2058
 
2080
static bool is_mobile(struct sna *sna)
2059
static bool is_mobile(struct sna *sna, int devid)
2081
{
2060
{
2082
	return (sna->PciInfo->device_id & 0xf) == 0x6;
2061
	return (devid & 0xf) == 0x6;
2083
}
2062
}
2084
 
2063
 
2085
static bool gen7_render_setup(struct sna *sna)
2064
static bool gen7_render_setup(struct sna *sna, int devid)
2086
{
2065
{
2087
    struct gen7_render_state *state = &sna->render_state.gen7;
2066
    struct gen7_render_state *state = &sna->render_state.gen7;
2088
    struct sna_static_stream general;
2067
    struct sna_static_stream general;
2089
    struct gen7_sampler_state *ss;
2068
    struct gen7_sampler_state *ss;
2090
    int i, j, k, l, m;
2069
    int i, j, k, l, m;
2091
 
2070
 
2092
	if (is_ivb(sna)) {
2071
	if (is_ivb(sna)) {
2093
        state->info = &ivb_gt_info;
2072
        state->info = &ivb_gt_info;
2094
		if (sna->PciInfo->device_id & 0xf) {
2073
		if (devid & 0xf) {
2095
            state->info = &ivb_gt1_info;
2074
            state->info = &ivb_gt1_info;
2096
            if (is_gt2(sna))
2075
			if (is_gt2(sna, devid))
2097
                state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
2076
                state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
2098
        }
2077
        }
2099
	} else if (is_byt(sna)) {
2078
	} else if (is_byt(sna)) {
2100
		state->info = &byt_gt_info;
2079
		state->info = &byt_gt_info;
2101
	} else if (is_hsw(sna)) {
2080
	} else if (is_hsw(sna)) {
2102
        state->info = &hsw_gt_info;
2081
        state->info = &hsw_gt_info;
2103
		if (sna->PciInfo->device_id & 0xf) {
2082
		if (devid & 0xf) {
2104
			if (is_gt3(sna))
2083
			if (is_gt3(sna, devid))
2105
				state->info = &hsw_gt3_info;
2084
				state->info = &hsw_gt3_info;
2106
			else if (is_gt2(sna))
2085
			else if (is_gt2(sna, devid))
2107
				state->info = &hsw_gt2_info;
2086
				state->info = &hsw_gt2_info;
2108
			else
2087
			else
2109
            state->info = &hsw_gt1_info;
2088
            state->info = &hsw_gt1_info;
2110
        }
2089
        }
2111
    } else
2090
    } else
2112
        return false;
2091
        return false;
-
 
2092
 
-
 
2093
	state->gt = state->info->gt;
2113
 
2094
 
2114
    sna_static_stream_init(&general);
2095
    sna_static_stream_init(&general);
2115
 
2096
 
2116
    /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2097
    /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2117
     * dumps, you know it points to zero.
2098
     * dumps, you know it points to zero.
2118
     */
2099
     */
2119
    null_create(&general);
2100
    null_create(&general);
2120
 
2101
 
2121
    for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
2102
    for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
2122
        if (wm_kernels[m].size) {
2103
        if (wm_kernels[m].size) {
2123
            state->wm_kernel[m][1] =
2104
            state->wm_kernel[m][1] =
2124
                sna_static_stream_add(&general,
2105
                sna_static_stream_add(&general,
2125
                              wm_kernels[m].data,
2106
                              wm_kernels[m].data,
2126
                              wm_kernels[m].size,
2107
                              wm_kernels[m].size,
2127
                              64);
2108
                              64);
2128
        } else {
2109
        } else {
2129
            if (USE_8_PIXEL_DISPATCH) {
2110
            if (USE_8_PIXEL_DISPATCH) {
2130
                state->wm_kernel[m][0] =
2111
                state->wm_kernel[m][0] =
2131
                    sna_static_stream_compile_wm(sna, &general,
2112
                    sna_static_stream_compile_wm(sna, &general,
2132
                                     wm_kernels[m].data, 8);
2113
                                     wm_kernels[m].data, 8);
2133
            }
2114
            }
2134
 
2115
 
2135
            if (USE_16_PIXEL_DISPATCH) {
2116
            if (USE_16_PIXEL_DISPATCH) {
2136
                state->wm_kernel[m][1] =
2117
                state->wm_kernel[m][1] =
2137
                    sna_static_stream_compile_wm(sna, &general,
2118
                    sna_static_stream_compile_wm(sna, &general,
2138
                                     wm_kernels[m].data, 16);
2119
                                     wm_kernels[m].data, 16);
2139
            }
2120
            }
2140
 
2121
 
2141
            if (USE_32_PIXEL_DISPATCH) {
2122
            if (USE_32_PIXEL_DISPATCH) {
2142
                state->wm_kernel[m][2] =
2123
                state->wm_kernel[m][2] =
2143
                    sna_static_stream_compile_wm(sna, &general,
2124
                    sna_static_stream_compile_wm(sna, &general,
2144
                                     wm_kernels[m].data, 32);
2125
                                     wm_kernels[m].data, 32);
2145
            }
2126
            }
2146
        }
2127
        }
2147
        assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
2128
        assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
2148
    }
2129
    }
2149
 
2130
 
2150
    ss = sna_static_stream_map(&general,
2131
    ss = sna_static_stream_map(&general,
2151
                   2 * sizeof(*ss) *
2132
                   2 * sizeof(*ss) *
2152
                   (2 +
2133
                   (2 +
2153
                    FILTER_COUNT * EXTEND_COUNT *
2134
                    FILTER_COUNT * EXTEND_COUNT *
2154
                    FILTER_COUNT * EXTEND_COUNT),
2135
                    FILTER_COUNT * EXTEND_COUNT),
2155
                   32);
2136
                   32);
2156
    state->wm_state = sna_static_stream_offsetof(&general, ss);
2137
    state->wm_state = sna_static_stream_offsetof(&general, ss);
2157
    sampler_copy_init(ss); ss += 2;
2138
    sampler_copy_init(ss); ss += 2;
2158
    sampler_fill_init(ss); ss += 2;
2139
    sampler_fill_init(ss); ss += 2;
2159
    for (i = 0; i < FILTER_COUNT; i++) {
2140
    for (i = 0; i < FILTER_COUNT; i++) {
2160
        for (j = 0; j < EXTEND_COUNT; j++) {
2141
        for (j = 0; j < EXTEND_COUNT; j++) {
2161
            for (k = 0; k < FILTER_COUNT; k++) {
2142
            for (k = 0; k < FILTER_COUNT; k++) {
2162
                for (l = 0; l < EXTEND_COUNT; l++) {
2143
                for (l = 0; l < EXTEND_COUNT; l++) {
2163
                    sampler_state_init(ss++, i, j);
2144
                    sampler_state_init(ss++, i, j);
2164
                    sampler_state_init(ss++, k, l);
2145
                    sampler_state_init(ss++, k, l);
2165
                }
2146
                }
2166
            }
2147
            }
2167
        }
2148
        }
2168
    }
2149
    }
2169
 
2150
 
2170
    state->cc_blend = gen7_composite_create_blend_state(&general);
2151
    state->cc_blend = gen7_composite_create_blend_state(&general);
2171
 
2152
 
2172
    state->general_bo = sna_static_stream_fini(sna, &general);
2153
    state->general_bo = sna_static_stream_fini(sna, &general);
2173
    return state->general_bo != NULL;
2154
    return state->general_bo != NULL;
2174
}
2155
}
2175
 
2156
 
2176
const char *gen7_render_init(struct sna *sna, const char *backend)
2157
const char *gen7_render_init(struct sna *sna, const char *backend)
2177
{
2158
{
-
 
2159
	int devid = intel_get_device_id(sna);
-
 
2160
 
2178
    if (!gen7_render_setup(sna))
2161
	if (!gen7_render_setup(sna, devid))
2179
		return backend;
2162
		return backend;
2180
 
2163
 
2181
    sna->kgem.context_switch = gen7_render_context_switch;
2164
	sna->kgem.context_switch = gen6_render_context_switch;
2182
    sna->kgem.retire = gen7_render_retire;
2165
	sna->kgem.retire = gen6_render_retire;
2183
    sna->kgem.expire = gen7_render_expire;
2166
	sna->kgem.expire = gen4_render_expire;
2184
 
2167
 
2185
#if 0
2168
#if 0
2186
#if !NO_COMPOSITE
2169
#if !NO_COMPOSITE
2187
	sna->render.composite = gen7_render_composite;
2170
	sna->render.composite = gen7_render_composite;
2188
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2171
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2189
#endif
2172
#endif
2190
#if !NO_COMPOSITE_SPANS
2173
#if !NO_COMPOSITE_SPANS
2191
	sna->render.check_composite_spans = gen7_check_composite_spans;
2174
	sna->render.check_composite_spans = gen7_check_composite_spans;
2192
	sna->render.composite_spans = gen7_render_composite_spans;
2175
	sna->render.composite_spans = gen7_render_composite_spans;
2193
	if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
2176
	if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna))
2194
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2177
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2195
#endif
2178
#endif
2196
	sna->render.video = gen7_render_video;
2179
	sna->render.video = gen7_render_video;
2197
 
2180
 
2198
#if !NO_COPY_BOXES
2181
#if !NO_COPY_BOXES
2199
	sna->render.copy_boxes = gen7_render_copy_boxes;
2182
	sna->render.copy_boxes = gen7_render_copy_boxes;
2200
#endif
2183
#endif
2201
#if !NO_COPY
2184
#if !NO_COPY
2202
	sna->render.copy = gen7_render_copy;
2185
	sna->render.copy = gen7_render_copy;
2203
#endif
2186
#endif
2204
 
2187
 
2205
#if !NO_FILL_BOXES
2188
#if !NO_FILL_BOXES
2206
	sna->render.fill_boxes = gen7_render_fill_boxes;
2189
	sna->render.fill_boxes = gen7_render_fill_boxes;
2207
#endif
2190
#endif
2208
#if !NO_FILL
2191
#if !NO_FILL
2209
	sna->render.fill = gen7_render_fill;
2192
	sna->render.fill = gen7_render_fill;
2210
#endif
2193
#endif
2211
#if !NO_FILL_ONE
2194
#if !NO_FILL_ONE
2212
	sna->render.fill_one = gen7_render_fill_one;
2195
	sna->render.fill_one = gen7_render_fill_one;
2213
#endif
2196
#endif
2214
#if !NO_FILL_CLEAR
2197
#if !NO_FILL_CLEAR
2215
	sna->render.clear = gen7_render_clear;
2198
	sna->render.clear = gen7_render_clear;
2216
#endif
2199
#endif
2217
#endif
2200
#endif
2218
 
2201
 
2219
    sna->render.blit_tex = gen7_blit_tex;
2202
    sna->render.blit_tex = gen7_blit_tex;
2220
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2203
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2221
 
2204
 
2222
    sna->render.flush = gen7_render_flush;
2205
	sna->render.flush = gen4_render_flush;
2223
    sna->render.reset = gen7_render_reset;
2206
    sna->render.reset = gen7_render_reset;
2224
    sna->render.fini = gen7_render_fini;
2207
    sna->render.fini = gen7_render_fini;
2225
 
2208
 
2226
    sna->render.max_3d_size = GEN7_MAX_SIZE;
2209
    sna->render.max_3d_size = GEN7_MAX_SIZE;
2227
    sna->render.max_3d_pitch = 1 << 18;
2210
    sna->render.max_3d_pitch = 1 << 18;
2228
	return sna->render_state.gen7.info->name;
2211
	return sna->render_state.gen7.info->name;
2229
}
2212
}
2230
 
2213
 
2231
 
2214
 
2232
static bool
2215
static bool
2233
gen7_blit_tex(struct sna *sna,
2216
gen7_blit_tex(struct sna *sna,
2234
              uint8_t op, bool scale,
2217
              uint8_t op, bool scale,
2235
		      PixmapPtr src, struct kgem_bo *src_bo,
2218
		      PixmapPtr src, struct kgem_bo *src_bo,
2236
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2219
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2237
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2220
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2238
              int32_t src_x, int32_t src_y,
2221
              int32_t src_x, int32_t src_y,
2239
              int32_t msk_x, int32_t msk_y,
2222
              int32_t msk_x, int32_t msk_y,
2240
              int32_t dst_x, int32_t dst_y,
2223
              int32_t dst_x, int32_t dst_y,
2241
              int32_t width, int32_t height,
2224
              int32_t width, int32_t height,
2242
              struct sna_composite_op *tmp)
2225
              struct sna_composite_op *tmp)
2243
{
2226
{
2244
 
2227
 
2245
 
2228
 
2246
    tmp->op = PictOpSrc;
2229
    tmp->op = PictOpSrc;
2247
 
2230
 
2248
    tmp->dst.pixmap = dst;
2231
    tmp->dst.pixmap = dst;
2249
    tmp->dst.bo     = dst_bo;
2232
    tmp->dst.bo     = dst_bo;
2250
    tmp->dst.width  = dst->drawable.width;
2233
    tmp->dst.width  = dst->drawable.width;
2251
    tmp->dst.height = dst->drawable.height;
2234
    tmp->dst.height = dst->drawable.height;
2252
    tmp->dst.format = PICT_a8r8g8b8;
2235
    tmp->dst.format = PICT_a8r8g8b8;
2253
 
2236
 
2254
 
2237
 
2255
	tmp->src.repeat = RepeatNone;
2238
	tmp->src.repeat = RepeatNone;
2256
	tmp->src.filter = PictFilterNearest;
2239
	tmp->src.filter = PictFilterNearest;
2257
    tmp->src.is_affine = true;
2240
    tmp->src.is_affine = true;
2258
 
2241
 
2259
    tmp->src.bo = src_bo;
2242
    tmp->src.bo = src_bo;
2260
	tmp->src.pict_format = PICT_x8r8g8b8;
2243
	tmp->src.pict_format = PICT_x8r8g8b8;
2261
    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
2244
    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
2262
    tmp->src.width  = src->drawable.width;
2245
    tmp->src.width  = src->drawable.width;
2263
    tmp->src.height = src->drawable.height;
2246
    tmp->src.height = src->drawable.height;
2264
 
2247
 
2265
 
2248
 
2266
	tmp->is_affine = tmp->src.is_affine;
2249
	tmp->is_affine = tmp->src.is_affine;
2267
	tmp->has_component_alpha = false;
2250
	tmp->has_component_alpha = false;
2268
	tmp->need_magic_ca_pass = false;
2251
	tmp->need_magic_ca_pass = false;
2269
 
2252
 
2270
	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2253
	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2271
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2254
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2272
    tmp->mask.is_affine = true;
2255
    tmp->mask.is_affine = true;
2273
 
2256
 
2274
    tmp->mask.bo = mask_bo;
2257
    tmp->mask.bo = mask_bo;
2275
    tmp->mask.pict_format = PIXMAN_a8;
2258
    tmp->mask.pict_format = PIXMAN_a8;
2276
    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
2259
    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
2277
    tmp->mask.width  = mask->drawable.width;
2260
    tmp->mask.width  = mask->drawable.width;
2278
    tmp->mask.height = mask->drawable.height;
2261
    tmp->mask.height = mask->drawable.height;
2279
 
2262
 
2280
    if( scale )
2263
    if( scale )
2281
    {
2264
    {
2282
        tmp->src.scale[0] = 1.f/width;
2265
        tmp->src.scale[0] = 1.f/width;
2283
        tmp->src.scale[1] = 1.f/height;
2266
        tmp->src.scale[1] = 1.f/height;
2284
    }
2267
    }
2285
    else
2268
    else
2286
    {
2269
    {
2287
        tmp->src.scale[0] = 1.f/src->drawable.width;
2270
        tmp->src.scale[0] = 1.f/src->drawable.width;
2288
        tmp->src.scale[1] = 1.f/src->drawable.height;
2271
        tmp->src.scale[1] = 1.f/src->drawable.height;
2289
    }
2272
    }
2290
 
2273
 
2291
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2274
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2292
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2275
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2293
 
2276
 
2294
 
2277
 
2295
 
2278
 
2296
	tmp->u.gen7.flags =
2279
	tmp->u.gen7.flags =
2297
		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2280
		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2298
					      tmp->src.repeat,
2281
					      tmp->src.repeat,
2299
					      tmp->mask.filter,
2282
					      tmp->mask.filter,
2300
					      tmp->mask.repeat),
2283
					      tmp->mask.repeat),
2301
			       gen7_get_blend(tmp->op,
2284
			       gen7_get_blend(tmp->op,
2302
					      tmp->has_component_alpha,
2285
					      tmp->has_component_alpha,
2303
					      tmp->dst.format),
2286
					      tmp->dst.format),
2304
/*			       gen7_choose_composite_kernel(tmp->op,
2287
/*			       gen7_choose_composite_kernel(tmp->op,
2305
							    tmp->mask.bo != NULL,
2288
							    tmp->mask.bo != NULL,
2306
							    tmp->has_component_alpha,
2289
							    tmp->has_component_alpha,
2307
							    tmp->is_affine), */
2290
							    tmp->is_affine), */
2308
                   GEN7_WM_KERNEL_MASK,
2291
                   GEN7_WM_KERNEL_MASK,
2309
			       gen4_choose_composite_emitter(sna, tmp));
2292
			       gen4_choose_composite_emitter(sna, tmp));
2310
 
2293
 
2311
	tmp->blt   = gen7_render_composite_blt;
2294
	tmp->blt   = gen7_render_composite_blt;
2312
//	tmp->box   = gen7_render_composite_box;
2295
//	tmp->box   = gen7_render_composite_box;
2313
	tmp->done  = gen7_render_composite_done;
2296
	tmp->done  = gen7_render_composite_done;
2314
 
2297
 
2315
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2298
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
2316
	if (!kgem_check_bo(&sna->kgem,
2299
	if (!kgem_check_bo(&sna->kgem,
2317
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2300
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2318
			   NULL)) {
2301
			   NULL)) {
2319
		kgem_submit(&sna->kgem);
2302
		kgem_submit(&sna->kgem);
2320
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2303
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2321
	}
2304
	}
2322
 
-
 
2323
	gen7_emit_composite_state(sna, tmp);
2305
 
-
 
2306
	gen7_align_vertex(sna, tmp);
2324
	gen7_align_vertex(sna, tmp);
2307
	gen7_emit_composite_state(sna, tmp);
2325
	return true;
2308
	return true;
2326
}
2309
}