Subversion Repositories Kolibri OS

Rev

Rev 4315 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4315 Serge 1
/*
2
 * Copyright © 2006,2008 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *
30
 */
31
 
32
#ifdef HAVE_CONFIG_H
33
#include "config.h"
34
#endif
35
 
36
#include 
37
#include 
38
#include 
39
 
40
#include 
41
//#include "xf86.h"
42
#include "intel.h"
43
#include "i830_reg.h"
44
#include "i965_reg.h"
45
 
46
/* bring in brw structs */
47
#include "brw_defines.h"
48
#include "brw_structs.h"
49
 
50
#define intel_debug_fallback printf
51
 
4348 Serge 52
#define DBG printf
4315 Serge 53
 
54
// refer vol2, 3d rasterization 3.8.1
55
 
56
/* defined in brw_defines.h */
57
static const struct blendinfo {
58
	Bool dst_alpha;
59
	Bool src_alpha;
60
	uint32_t src_blend;
61
	uint32_t dst_blend;
62
} i965_blend_op[] = {
63
	/* Clear */
64
	{0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO},
65
	/* Src */
66
	{0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO},
67
	/* Dst */
68
	{0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE},
69
	/* Over */
70
	{0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA},
71
	/* OverReverse */
72
	{1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
73
	/* In */
74
	{1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
75
	/* InReverse */
76
	{0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA},
77
	/* Out */
78
	{1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
79
	/* OutReverse */
80
	{0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA},
81
	/* Atop */
82
	{1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
83
	/* AtopReverse */
84
	{1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
85
	/* Xor */
86
	{1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
87
	/* Add */
88
	{0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE},
89
};
90
 
91
/**
92
 * Highest-valued BLENDFACTOR used in i965_blend_op.
93
 *
94
 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR,
95
 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
96
 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
97
 */
98
#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1)
99
 
100
/* FIXME: surface format defined in brw_defines.h, shared Sampling engine
101
 * 1.7.2
102
 */
103
static const struct formatinfo {
104
	int fmt;
105
	uint32_t card_fmt;
106
} i965_tex_formats[] = {
107
	{PICT_a8, BRW_SURFACEFORMAT_A8_UNORM},
108
	{PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM},
109
	{PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM},
110
	{PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM},
111
	{PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM},
112
	{PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM},
113
	{PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM},
114
	{PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM},
115
#if XORG_VERSION_CURRENT >= 10699900
116
	{PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM},
117
	{PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM},
118
	{PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM},
119
	{PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM},
120
#endif
121
	{PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM},
122
};
123
 
124
static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format,
125
				uint32_t * sblend, uint32_t * dblend)
126
{
127
 
128
	*sblend = i965_blend_op[op].src_blend;
129
	*dblend = i965_blend_op[op].dst_blend;
130
 
131
	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
132
	 * it as always 1.
133
	 */
134
	if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) {
135
		if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
136
			*sblend = BRW_BLENDFACTOR_ONE;
137
		else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
138
			*sblend = BRW_BLENDFACTOR_ZERO;
139
	}
140
 
141
	/* If the source alpha is being used, then we should only be in a case where
142
	 * the source blend factor is 0, and the source blend value is the mask
143
	 * channels multiplied by the source picture's alpha.
144
	 */
145
	if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format)
146
	    && i965_blend_op[op].src_alpha) {
147
		if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) {
148
			*dblend = BRW_BLENDFACTOR_SRC_COLOR;
149
		} else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) {
150
			*dblend = BRW_BLENDFACTOR_INV_SRC_COLOR;
151
		}
152
	}
153
 
154
}
155
 
156
static uint32_t i965_get_dest_format(PicturePtr dest_picture)
157
{
158
	switch (dest_picture->format) {
159
	case PICT_a8r8g8b8:
160
	case PICT_x8r8g8b8:
161
		return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
162
	case PICT_a8b8g8r8:
163
	case PICT_x8b8g8r8:
164
		return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
165
#if XORG_VERSION_CURRENT >= 10699900
166
	case PICT_a2r10g10b10:
167
	case PICT_x2r10g10b10:
168
		return BRW_SURFACEFORMAT_B10G10R10A2_UNORM;
169
#endif
170
	case PICT_r5g6b5:
171
		return BRW_SURFACEFORMAT_B5G6R5_UNORM;
172
	case PICT_x1r5g5b5:
173
	case PICT_a1r5g5b5:
174
		return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
175
	case PICT_a8:
176
		return BRW_SURFACEFORMAT_A8_UNORM;
177
	case PICT_a4r4g4b4:
178
	case PICT_x4r4g4b4:
179
		return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
180
	default:
181
		return -1;
182
	}
183
}
184
 
185
Bool
186
i965_check_composite(int op,
187
		     PicturePtr source_picture,
188
		     PicturePtr mask_picture,
189
		     PicturePtr dest_picture,
190
		     int width, int height)
191
{
192
	/* Check for unsupported compositing operations. */
193
	if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) {
194
		intel_debug_fallback("Unsupported Composite op 0x%x\n", op);
195
		return FALSE;
196
	}
197
 
198
	if (mask_picture && mask_picture->componentAlpha &&
199
	    PICT_FORMAT_RGB(mask_picture->format)) {
200
		/* Check if it's component alpha that relies on a source alpha and on
201
		 * the source value.  We can only get one of those into the single
202
		 * source value that we get to blend with.
203
		 */
204
		if (i965_blend_op[op].src_alpha &&
205
		    (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) {
206
			intel_debug_fallback("Component alpha not supported "
207
					     "with source alpha and source "
208
					     "value blending.\n");
209
			return FALSE;
210
		}
211
	}
212
 
213
	if (i965_get_dest_format(dest_picture) == -1) {
214
		intel_debug_fallback("Usupported Color buffer format 0x%x\n",
215
				     (int)dest_picture->format);
216
		return FALSE;
217
	}
218
 
219
	return TRUE;
220
}
221
 
222
Bool
223
i965_check_composite_texture(ScreenPtr screen, PicturePtr picture)
224
{
225
	if (picture->repeatType > RepeatReflect) {
226
		intel_debug_fallback("extended repeat (%d) not supported\n",
227
				     picture->repeatType);
228
		return FALSE;
229
	}
230
 
231
	if (picture->filter != PictFilterNearest &&
232
	    picture->filter != PictFilterBilinear) {
233
		intel_debug_fallback("Unsupported filter 0x%x\n", picture->filter);
234
		return FALSE;
235
	}
236
 
237
	if (picture->pDrawable) {
238
		int w, h, i;
239
 
240
		w = picture->pDrawable->width;
241
		h = picture->pDrawable->height;
242
		if ((w > 8192) || (h > 8192)) {
243
			intel_debug_fallback( "Picture w/h too large (%dx%d)\n",w, h);
244
			return FALSE;
245
		}
246
 
247
		for (i = 0;
248
		     i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
249
		     i++) {
250
			if (i965_tex_formats[i].fmt == picture->format)
251
				break;
252
		}
253
		if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]))
254
		{
255
			intel_debug_fallback("Unsupported picture format "
256
					     "0x%x\n", (int)picture->format);
257
			return FALSE;
258
		}
259
 
260
		return TRUE;
261
	}
262
 
263
	return FALSE;
264
}
265
 
266
 
267
#define BRW_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
268
 
269
/* Set up a default static partitioning of the URB, which is supposed to
270
 * allow anything we would want to do, at potentially lower performance.
271
 */
272
#define URB_CS_ENTRY_SIZE     0
273
#define URB_CS_ENTRIES	      0
274
 
275
#define URB_VS_ENTRY_SIZE     1	// each 512-bit row
276
#define URB_VS_ENTRIES	      8	// we needs at least 8 entries
277
 
278
#define URB_GS_ENTRY_SIZE     0
279
#define URB_GS_ENTRIES	      0
280
 
281
#define URB_CLIP_ENTRY_SIZE   0
282
#define URB_CLIP_ENTRIES      0
283
 
284
#define URB_SF_ENTRY_SIZE     2
285
#define URB_SF_ENTRIES	      1
286
 
287
/*
288
 * this program computes dA/dx and dA/dy for the texture coordinates along
289
 * with the base texture coordinate. It was extracted from the Mesa driver
290
 */
291
 
292
#define SF_KERNEL_NUM_GRF  16
293
#define SF_MAX_THREADS	   2
294
 
295
static const uint32_t sf_kernel_static[][4] = {
296
#include "exa_sf.g4b"
297
};
298
 
299
static const uint32_t sf_kernel_mask_static[][4] = {
300
#include "exa_sf_mask.g4b"
301
};
302
 
303
/* ps kernels */
304
#define PS_KERNEL_NUM_GRF   32
305
#define PS_MAX_THREADS	    48
306
 
307
static const uint32_t ps_kernel_nomask_affine_static[][4] = {
308
#include "exa_wm_xy.g4b"
309
#include "exa_wm_src_affine.g4b"
310
#include "exa_wm_src_sample_argb.g4b"
311
#include "exa_wm_write.g4b"
312
};
313
 
314
static const uint32_t ps_kernel_nomask_projective_static[][4] = {
315
#include "exa_wm_xy.g4b"
316
#include "exa_wm_src_projective.g4b"
317
#include "exa_wm_src_sample_argb.g4b"
318
#include "exa_wm_write.g4b"
319
};
320
 
321
static const uint32_t ps_kernel_maskca_affine_static[][4] = {
322
#include "exa_wm_xy.g4b"
323
#include "exa_wm_src_affine.g4b"
324
#include "exa_wm_src_sample_argb.g4b"
325
#include "exa_wm_mask_affine.g4b"
326
#include "exa_wm_mask_sample_argb.g4b"
327
#include "exa_wm_ca.g4b"
328
#include "exa_wm_write.g4b"
329
};
330
 
331
static const uint32_t ps_kernel_maskca_projective_static[][4] = {
332
#include "exa_wm_xy.g4b"
333
#include "exa_wm_src_projective.g4b"
334
#include "exa_wm_src_sample_argb.g4b"
335
#include "exa_wm_mask_projective.g4b"
336
#include "exa_wm_mask_sample_argb.g4b"
337
#include "exa_wm_ca.g4b"
338
#include "exa_wm_write.g4b"
339
};
340
 
341
static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = {
342
#include "exa_wm_xy.g4b"
343
#include "exa_wm_src_affine.g4b"
344
#include "exa_wm_src_sample_a.g4b"
345
#include "exa_wm_mask_affine.g4b"
346
#include "exa_wm_mask_sample_argb.g4b"
347
#include "exa_wm_ca_srcalpha.g4b"
348
#include "exa_wm_write.g4b"
349
};
350
 
351
static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = {
352
#include "exa_wm_xy.g4b"
353
#include "exa_wm_src_projective.g4b"
354
#include "exa_wm_src_sample_a.g4b"
355
#include "exa_wm_mask_projective.g4b"
356
#include "exa_wm_mask_sample_argb.g4b"
357
#include "exa_wm_ca_srcalpha.g4b"
358
#include "exa_wm_write.g4b"
359
};
360
 
361
static const uint32_t ps_kernel_masknoca_affine_static[][4] = {
362
#include "exa_wm_xy.g4b"
363
#include "exa_wm_src_affine.g4b"
364
#include "exa_wm_src_sample_argb.g4b"
365
#include "exa_wm_mask_affine.g4b"
366
#include "exa_wm_mask_sample_a.g4b"
367
#include "exa_wm_noca.g4b"
368
#include "exa_wm_write.g4b"
369
};
370
 
371
static const uint32_t ps_kernel_masknoca_projective_static[][4] = {
372
#include "exa_wm_xy.g4b"
373
#include "exa_wm_src_projective.g4b"
374
#include "exa_wm_src_sample_argb.g4b"
375
#include "exa_wm_mask_projective.g4b"
376
#include "exa_wm_mask_sample_a.g4b"
377
#include "exa_wm_noca.g4b"
378
#include "exa_wm_write.g4b"
379
};
380
 
381
/* new programs for Ironlake */
382
static const uint32_t sf_kernel_static_gen5[][4] = {
383
#include "exa_sf.g4b.gen5"
384
};
385
 
386
static const uint32_t sf_kernel_mask_static_gen5[][4] = {
387
#include "exa_sf_mask.g4b.gen5"
388
};
389
 
390
static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = {
391
#include "exa_wm_xy.g4b.gen5"
392
#include "exa_wm_src_affine.g4b.gen5"
393
#include "exa_wm_src_sample_argb.g4b.gen5"
394
#include "exa_wm_write.g4b.gen5"
395
};
396
 
397
static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = {
398
#include "exa_wm_xy.g4b.gen5"
399
#include "exa_wm_src_projective.g4b.gen5"
400
#include "exa_wm_src_sample_argb.g4b.gen5"
401
#include "exa_wm_write.g4b.gen5"
402
};
403
 
404
static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = {
405
#include "exa_wm_xy.g4b.gen5"
406
#include "exa_wm_src_affine.g4b.gen5"
407
#include "exa_wm_src_sample_argb.g4b.gen5"
408
#include "exa_wm_mask_affine.g4b.gen5"
409
#include "exa_wm_mask_sample_argb.g4b.gen5"
410
#include "exa_wm_ca.g4b.gen5"
411
#include "exa_wm_write.g4b.gen5"
412
};
413
 
414
static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = {
415
#include "exa_wm_xy.g4b.gen5"
416
#include "exa_wm_src_projective.g4b.gen5"
417
#include "exa_wm_src_sample_argb.g4b.gen5"
418
#include "exa_wm_mask_projective.g4b.gen5"
419
#include "exa_wm_mask_sample_argb.g4b.gen5"
420
#include "exa_wm_ca.g4b.gen5"
421
#include "exa_wm_write.g4b.gen5"
422
};
423
 
424
static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = {
425
#include "exa_wm_xy.g4b.gen5"
426
#include "exa_wm_src_affine.g4b.gen5"
427
#include "exa_wm_src_sample_a.g4b.gen5"
428
#include "exa_wm_mask_affine.g4b.gen5"
429
#include "exa_wm_mask_sample_argb.g4b.gen5"
430
#include "exa_wm_ca_srcalpha.g4b.gen5"
431
#include "exa_wm_write.g4b.gen5"
432
};
433
 
434
static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = {
435
#include "exa_wm_xy.g4b.gen5"
436
#include "exa_wm_src_projective.g4b.gen5"
437
#include "exa_wm_src_sample_a.g4b.gen5"
438
#include "exa_wm_mask_projective.g4b.gen5"
439
#include "exa_wm_mask_sample_argb.g4b.gen5"
440
#include "exa_wm_ca_srcalpha.g4b.gen5"
441
#include "exa_wm_write.g4b.gen5"
442
};
443
 
444
static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = {
445
#include "exa_wm_xy.g4b.gen5"
446
#include "exa_wm_src_affine.g4b.gen5"
447
#include "exa_wm_src_sample_argb.g4b.gen5"
448
#include "exa_wm_mask_affine.g4b.gen5"
449
#include "exa_wm_mask_sample_a.g4b.gen5"
450
#include "exa_wm_noca.g4b.gen5"
451
#include "exa_wm_write.g4b.gen5"
452
};
453
 
454
static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = {
455
#include "exa_wm_xy.g4b.gen5"
456
#include "exa_wm_src_projective.g4b.gen5"
457
#include "exa_wm_src_sample_argb.g4b.gen5"
458
#include "exa_wm_mask_projective.g4b.gen5"
459
#include "exa_wm_mask_sample_a.g4b.gen5"
460
#include "exa_wm_noca.g4b.gen5"
461
#include "exa_wm_write.g4b.gen5"
462
};
463
 
464
/* programs for GEN6 */
465
static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = {
466
#include "exa_wm_src_affine.g6b"
467
#include "exa_wm_src_sample_argb.g6b"
468
#include "exa_wm_write.g6b"
469
};
470
 
471
static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = {
472
#include "exa_wm_src_projective.g6b"
473
#include "exa_wm_src_sample_argb.g6b"
474
#include "exa_wm_write.g6b"
475
};
476
 
477
static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = {
478
#include "exa_wm_src_affine.g6b"
479
#include "exa_wm_src_sample_argb.g6b"
480
#include "exa_wm_mask_affine.g6b"
481
#include "exa_wm_mask_sample_argb.g6b"
482
#include "exa_wm_ca.g6b"
483
#include "exa_wm_write.g6b"
484
};
485
 
486
static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = {
487
#include "exa_wm_src_projective.g6b"
488
#include "exa_wm_src_sample_argb.g6b"
489
#include "exa_wm_mask_projective.g6b"
490
#include "exa_wm_mask_sample_argb.g6b"
491
#include "exa_wm_ca.g4b.gen5"
492
#include "exa_wm_write.g6b"
493
};
494
 
495
static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = {
496
#include "exa_wm_src_affine.g6b"
497
#include "exa_wm_src_sample_a.g6b"
498
#include "exa_wm_mask_affine.g6b"
499
#include "exa_wm_mask_sample_argb.g6b"
500
#include "exa_wm_ca_srcalpha.g6b"
501
#include "exa_wm_write.g6b"
502
};
503
 
504
static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = {
505
#include "exa_wm_src_projective.g6b"
506
#include "exa_wm_src_sample_a.g6b"
507
#include "exa_wm_mask_projective.g6b"
508
#include "exa_wm_mask_sample_argb.g6b"
509
#include "exa_wm_ca_srcalpha.g6b"
510
#include "exa_wm_write.g6b"
511
};
512
 
513
static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = {
514
#include "exa_wm_src_affine.g6b"
515
#include "exa_wm_src_sample_argb.g6b"
516
#include "exa_wm_mask_affine.g6b"
517
#include "exa_wm_mask_sample_a.g6b"
518
#include "exa_wm_noca.g6b"
519
#include "exa_wm_write.g6b"
520
};
521
 
522
static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = {
523
#include "exa_wm_src_projective.g6b"
524
#include "exa_wm_src_sample_argb.g6b"
525
#include "exa_wm_mask_projective.g6b"
526
#include "exa_wm_mask_sample_a.g6b"
527
#include "exa_wm_noca.g6b"
528
#include "exa_wm_write.g6b"
529
};
530
 
531
/* programs for GEN7 */
532
static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = {
533
#include "exa_wm_src_affine.g7b"
534
#include "exa_wm_src_sample_argb.g7b"
535
#include "exa_wm_write.g7b"
536
};
537
 
538
static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = {
539
#include "exa_wm_src_projective.g7b"
540
#include "exa_wm_src_sample_argb.g7b"
541
#include "exa_wm_write.g7b"
542
};
543
 
544
static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = {
545
#include "exa_wm_src_affine.g7b"
546
#include "exa_wm_src_sample_argb.g7b"
547
#include "exa_wm_mask_affine.g7b"
548
#include "exa_wm_mask_sample_argb.g7b"
549
#include "exa_wm_ca.g6b"
550
#include "exa_wm_write.g7b"
551
};
552
 
553
static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = {
554
#include "exa_wm_src_projective.g7b"
555
#include "exa_wm_src_sample_argb.g7b"
556
#include "exa_wm_mask_projective.g7b"
557
#include "exa_wm_mask_sample_argb.g7b"
558
#include "exa_wm_ca.g4b.gen5"
559
#include "exa_wm_write.g7b"
560
};
561
 
562
static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = {
563
#include "exa_wm_src_affine.g7b"
564
#include "exa_wm_src_sample_a.g7b"
565
#include "exa_wm_mask_affine.g7b"
566
#include "exa_wm_mask_sample_argb.g7b"
567
#include "exa_wm_ca_srcalpha.g6b"
568
#include "exa_wm_write.g7b"
569
};
570
 
571
static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = {
572
#include "exa_wm_src_projective.g7b"
573
#include "exa_wm_src_sample_a.g7b"
574
#include "exa_wm_mask_projective.g7b"
575
#include "exa_wm_mask_sample_argb.g7b"
576
#include "exa_wm_ca_srcalpha.g6b"
577
#include "exa_wm_write.g7b"
578
};
579
 
580
static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = {
581
#include "exa_wm_src_affine.g7b"
582
#include "exa_wm_src_sample_argb.g7b"
583
#include "exa_wm_mask_affine.g7b"
584
#include "exa_wm_mask_sample_a.g7b"
585
#include "exa_wm_noca.g6b"
586
#include "exa_wm_write.g7b"
587
};
588
 
589
static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = {
590
#include "exa_wm_src_projective.g7b"
591
#include "exa_wm_src_sample_argb.g7b"
592
#include "exa_wm_mask_projective.g7b"
593
#include "exa_wm_mask_sample_a.g7b"
594
#include "exa_wm_noca.g6b"
595
#include "exa_wm_write.g7b"
596
};
597
 
598
 
599
typedef enum {
600
	SS_INVALID_FILTER = -1,
601
	SS_FILTER_NEAREST,
602
	SS_FILTER_BILINEAR,
603
	FILTER_COUNT,
604
} sampler_state_filter_t;
605
 
606
typedef enum {
607
	SS_INVALID_EXTEND = -1,
608
	SS_EXTEND_NONE,
609
	SS_EXTEND_REPEAT,
610
	SS_EXTEND_PAD,
611
	SS_EXTEND_REFLECT,
612
	EXTEND_COUNT,
613
} sampler_state_extend_t;
614
 
615
typedef enum {
616
	WM_KERNEL_NOMASK_AFFINE,
617
	WM_KERNEL_NOMASK_PROJECTIVE,
618
	WM_KERNEL_MASKCA_AFFINE,
619
	WM_KERNEL_MASKCA_PROJECTIVE,
620
	WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
621
	WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
622
	WM_KERNEL_MASKNOCA_AFFINE,
623
	WM_KERNEL_MASKNOCA_PROJECTIVE,
624
	KERNEL_COUNT
625
} wm_kernel_t;
626
 
627
#define KERNEL(kernel_enum, kernel, masked) \
628
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
629
struct wm_kernel_info {
630
	const void *data;
631
	unsigned int size;
632
	Bool has_mask;
633
};
634
 
635
static const struct wm_kernel_info wm_kernels_gen4[] = {
636
	KERNEL(WM_KERNEL_NOMASK_AFFINE,
637
	       ps_kernel_nomask_affine_static, FALSE),
638
	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
639
	       ps_kernel_nomask_projective_static, FALSE),
640
	KERNEL(WM_KERNEL_MASKCA_AFFINE,
641
	       ps_kernel_maskca_affine_static, TRUE),
642
	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
643
	       ps_kernel_maskca_projective_static, TRUE),
644
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
645
	       ps_kernel_maskca_srcalpha_affine_static, TRUE),
646
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
647
	       ps_kernel_maskca_srcalpha_projective_static, TRUE),
648
	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
649
	       ps_kernel_masknoca_affine_static, TRUE),
650
	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
651
	       ps_kernel_masknoca_projective_static, TRUE),
652
};
653
 
654
static const struct wm_kernel_info wm_kernels_gen5[] = {
655
	KERNEL(WM_KERNEL_NOMASK_AFFINE,
656
	       ps_kernel_nomask_affine_static_gen5, FALSE),
657
	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
658
	       ps_kernel_nomask_projective_static_gen5, FALSE),
659
	KERNEL(WM_KERNEL_MASKCA_AFFINE,
660
	       ps_kernel_maskca_affine_static_gen5, TRUE),
661
	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
662
	       ps_kernel_maskca_projective_static_gen5, TRUE),
663
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
664
	       ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE),
665
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
666
	       ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE),
667
	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
668
	       ps_kernel_masknoca_affine_static_gen5, TRUE),
669
	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
670
	       ps_kernel_masknoca_projective_static_gen5, TRUE),
671
};
672
 
673
static const struct wm_kernel_info wm_kernels_gen6[] = {
674
	KERNEL(WM_KERNEL_NOMASK_AFFINE,
675
	       ps_kernel_nomask_affine_static_gen6, FALSE),
676
	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
677
	       ps_kernel_nomask_projective_static_gen6, FALSE),
678
	KERNEL(WM_KERNEL_MASKCA_AFFINE,
679
	       ps_kernel_maskca_affine_static_gen6, TRUE),
680
	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
681
	       ps_kernel_maskca_projective_static_gen6, TRUE),
682
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
683
	       ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE),
684
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
685
	       ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE),
686
	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
687
	       ps_kernel_masknoca_affine_static_gen6, TRUE),
688
	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
689
	       ps_kernel_masknoca_projective_static_gen6, TRUE),
690
};
691
 
692
static const struct wm_kernel_info wm_kernels_gen7[] = {
693
	KERNEL(WM_KERNEL_NOMASK_AFFINE,
694
	       ps_kernel_nomask_affine_static_gen7, FALSE),
695
	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
696
	       ps_kernel_nomask_projective_static_gen7, FALSE),
697
	KERNEL(WM_KERNEL_MASKCA_AFFINE,
698
	       ps_kernel_maskca_affine_static_gen7, TRUE),
699
	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
700
	       ps_kernel_maskca_projective_static_gen7, TRUE),
701
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
702
	       ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE),
703
	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
704
	       ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE),
705
	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
706
	       ps_kernel_masknoca_affine_static_gen7, TRUE),
707
	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
708
	       ps_kernel_masknoca_projective_static_gen7, TRUE),
709
};
710
 
711
#undef KERNEL
712
 
713
typedef struct _brw_cc_unit_state_padded {
714
	struct brw_cc_unit_state state;
715
	char pad[64 - sizeof(struct brw_cc_unit_state)];
716
} brw_cc_unit_state_padded;
717
 
718
#ifndef MAX
719
#define MAX(a, b) ((a) > (b) ? (a) : (b))
720
#endif
721
#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32)
722
 
723
struct gen4_cc_unit_state {
724
	/* Index by [src_blend][dst_blend] */
725
	brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT];
726
};
727
 
728
typedef struct gen4_composite_op {
729
	int op;
730
	sampler_state_filter_t src_filter;
731
	sampler_state_filter_t mask_filter;
732
	sampler_state_extend_t src_extend;
733
	sampler_state_extend_t mask_extend;
734
	Bool is_affine;
735
	wm_kernel_t wm_kernel;
736
	int vertex_id;
737
} gen4_composite_op;
738
 
739
/** Private data for gen4 render accel implementation. */
740
struct gen4_render_state {
741
	drm_intel_bo *vs_state_bo;
742
	drm_intel_bo *sf_state_bo;
743
	drm_intel_bo *sf_mask_state_bo;
744
	drm_intel_bo *cc_state_bo;
745
	drm_intel_bo *wm_state_bo[KERNEL_COUNT]
746
	    [FILTER_COUNT] [EXTEND_COUNT]
747
	    [FILTER_COUNT] [EXTEND_COUNT];
748
	drm_intel_bo *wm_kernel_bo[KERNEL_COUNT];
749
 
750
	drm_intel_bo *cc_vp_bo;
751
	drm_intel_bo *gen6_blend_bo;
752
	drm_intel_bo *gen6_depth_stencil_bo;
753
	drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT]
754
	    [EXTEND_COUNT]
755
	    [FILTER_COUNT]
756
	    [EXTEND_COUNT];
757
	gen4_composite_op composite_op;
758
};
759
 
760
static void gen6_emit_composite_state(struct intel_screen_private *intel);
761
static void gen6_render_state_init();
762
 
763
/**
764
 * Sets up the SF state pointing at an SF kernel.
765
 *
766
 * The SF kernel does coord interp: for each attribute,
767
 * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
768
 * back to SF which then hands pixels off to WM.
769
 */
770
static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel,
771
					  drm_intel_bo * kernel_bo)
772
{
773
	struct brw_sf_unit_state *sf_state;
774
	drm_intel_bo *sf_state_bo;
775
	int ret;
776
 
777
	sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state",
778
					 sizeof(*sf_state), 4096);
779
	assert(sf_state_bo);
780
 
781
	ret = drm_intel_bo_map(sf_state_bo, TRUE);
782
	assert(ret == 0);
783
 
784
	sf_state = memset(sf_state_bo->virtual, 0, sizeof(*sf_state));
785
	sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
786
	sf_state->thread0.kernel_start_pointer =
787
	    intel_emit_reloc(sf_state_bo,
788
			     offsetof(struct brw_sf_unit_state, thread0),
789
			     kernel_bo, sf_state->thread0.grf_reg_count << 1,
790
			     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
791
	sf_state->sf1.single_program_flow = 1;
792
	sf_state->sf1.binding_table_entry_count = 0;
793
	sf_state->sf1.thread_priority = 0;
794
	sf_state->sf1.floating_point_mode = 0;	/* Mesa does this */
795
	sf_state->sf1.illegal_op_exception_enable = 1;
796
	sf_state->sf1.mask_stack_exception_enable = 1;
797
	sf_state->sf1.sw_exception_enable = 1;
798
	sf_state->thread2.per_thread_scratch_space = 0;
799
	/* scratch space is not used in our kernel */
800
	sf_state->thread2.scratch_space_base_pointer = 0;
801
	sf_state->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
802
	sf_state->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
803
	sf_state->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
804
	/* don't smash vertex header, read start from dw8 */
805
	sf_state->thread3.urb_entry_read_offset = 1;
806
	sf_state->thread3.dispatch_grf_start_reg = 3;
807
	sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
808
	sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
809
	sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
810
	sf_state->sf5.viewport_transform = FALSE;	/* skip viewport */
811
	sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
812
	sf_state->sf6.scissor = 0;
813
	sf_state->sf7.trifan_pv = 2;
814
	sf_state->sf6.dest_org_vbias = 0x8;
815
	sf_state->sf6.dest_org_hbias = 0x8;
816
 
817
	drm_intel_bo_unmap(sf_state_bo);
818
 
819
	return sf_state_bo;
820
	(void)ret;
821
}
822
 
823
static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel)
824
{
825
	struct brw_sampler_legacy_border_color sampler_border_color;
826
 
827
	/* Set up the sampler border color (always transparent black) */
828
	memset(&sampler_border_color, 0, sizeof(sampler_border_color));
829
	sampler_border_color.color[0] = 0;	/* R */
830
	sampler_border_color.color[1] = 0;	/* G */
831
	sampler_border_color.color[2] = 0;	/* B */
832
	sampler_border_color.color[3] = 0;	/* A */
833
 
834
	return intel_bo_alloc_for_data(intel,
835
				       &sampler_border_color,
836
				       sizeof(sampler_border_color),
837
				       "gen4 render sampler border color");
838
}
839
 
840
static void
841
gen4_sampler_state_init(drm_intel_bo * sampler_state_bo,
842
		   struct brw_sampler_state *sampler_state,
843
		   sampler_state_filter_t filter,
844
		   sampler_state_extend_t extend,
845
		   drm_intel_bo * border_color_bo)
846
{
847
	uint32_t sampler_state_offset;
848
 
849
	sampler_state_offset = (char *)sampler_state -
850
	    (char *)sampler_state_bo->virtual;
851
 
852
	/* PS kernel use this sampler */
853
	memset(sampler_state, 0, sizeof(*sampler_state));
854
 
855
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
856
 
857
	/* We use the legacy mode to get the semantics specified by
858
	 * the Render extension. */
859
	sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
860
 
861
	switch (filter) {
862
	default:
863
	case SS_FILTER_NEAREST:
864
		sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
865
		sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
866
		break;
867
	case SS_FILTER_BILINEAR:
868
		sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
869
		sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
870
		break;
871
	}
872
 
873
	switch (extend) {
874
	default:
875
	case SS_EXTEND_NONE:
876
		sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
877
		sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
878
		sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
879
		break;
880
	case SS_EXTEND_REPEAT:
881
		sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
882
		sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
883
		sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
884
		break;
885
	case SS_EXTEND_PAD:
886
		sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
887
		sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
888
		sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
889
		break;
890
	case SS_EXTEND_REFLECT:
891
		sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
892
		sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
893
		sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
894
		break;
895
	}
896
 
897
	sampler_state->ss2.border_color_pointer =
898
	    intel_emit_reloc(sampler_state_bo, sampler_state_offset +
899
			     offsetof(struct brw_sampler_state, ss2),
900
			     border_color_bo, 0,
901
			     I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
902
 
903
	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
904
}
905
 
906
static void
907
gen7_sampler_state_init(drm_intel_bo * sampler_state_bo,
908
		   struct gen7_sampler_state *sampler_state,
909
		   sampler_state_filter_t filter,
910
		   sampler_state_extend_t extend,
911
		   drm_intel_bo * border_color_bo)
912
{
913
	uint32_t sampler_state_offset;
914
 
915
	sampler_state_offset = (char *)sampler_state -
916
	    (char *)sampler_state_bo->virtual;
917
 
918
	/* PS kernel use this sampler */
919
	memset(sampler_state, 0, sizeof(*sampler_state));
920
 
921
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
922
 
923
	/* We use the legacy mode to get the semantics specified by
924
	 * the Render extension. */
925
	sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
926
 
927
	switch (filter) {
928
	default:
929
	case SS_FILTER_NEAREST:
930
		sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
931
		sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
932
		break;
933
	case SS_FILTER_BILINEAR:
934
		sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
935
		sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
936
		break;
937
	}
938
 
939
	switch (extend) {
940
	default:
941
	case SS_EXTEND_NONE:
942
		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
943
		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
944
		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
945
		break;
946
	case SS_EXTEND_REPEAT:
947
		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
948
		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
949
		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
950
		break;
951
	case SS_EXTEND_PAD:
952
		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
953
		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
954
		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
955
		break;
956
	case SS_EXTEND_REFLECT:
957
		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
958
		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
959
		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
960
		break;
961
	}
962
 
963
	sampler_state->ss2.default_color_pointer =
964
	    intel_emit_reloc(sampler_state_bo, sampler_state_offset +
965
			     offsetof(struct gen7_sampler_state, ss2),
966
			     border_color_bo, 0,
967
			     I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
968
 
969
	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
970
}
971
 
972
 
973
 
974
static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
975
					       sampler_state_filter_t src_filter,
976
					       sampler_state_extend_t src_extend,
977
					       sampler_state_filter_t mask_filter,
978
					       sampler_state_extend_t mask_extend,
979
					       drm_intel_bo * border_color_bo)
980
{
981
	drm_intel_bo *sampler_state_bo;
982
	struct brw_sampler_state *sampler_state;
983
	int ret;
984
 
985
	sampler_state_bo =
986
	    drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state",
987
			       sizeof(struct brw_sampler_state) * 2, 4096);
988
	assert(sampler_state_bo);
989
 
990
	ret = drm_intel_bo_map(sampler_state_bo, TRUE);
991
	assert(ret == 0);
992
 
993
	sampler_state = sampler_state_bo->virtual;
994
 
995
	gen4_sampler_state_init(sampler_state_bo,
996
				&sampler_state[0],
997
				src_filter, src_extend, border_color_bo);
998
	gen4_sampler_state_init(sampler_state_bo,
999
				&sampler_state[1],
1000
				mask_filter, mask_extend, border_color_bo);
1001
 
1002
	drm_intel_bo_unmap(sampler_state_bo);
1003
 
1004
	return sampler_state_bo;
1005
	(void)ret;
1006
}
1007
 
1008
static drm_intel_bo *
1009
gen7_create_sampler_state(intel_screen_private *intel,
1010
			  sampler_state_filter_t src_filter,
1011
			  sampler_state_extend_t src_extend,
1012
			  sampler_state_filter_t mask_filter,
1013
			  sampler_state_extend_t mask_extend,
1014
			  drm_intel_bo * border_color_bo)
1015
{
1016
	drm_intel_bo *sampler_state_bo;
1017
	struct gen7_sampler_state *sampler_state;
1018
	int ret;
1019
 
1020
	sampler_state_bo =
1021
	    drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state",
1022
			       sizeof(struct gen7_sampler_state) * 2, 4096);
1023
	assert(sampler_state_bo);
1024
 
1025
	ret = drm_intel_bo_map(sampler_state_bo, TRUE);
1026
	assert(ret == 0);
1027
 
1028
	sampler_state = sampler_state_bo->virtual;
1029
 
1030
	gen7_sampler_state_init(sampler_state_bo,
1031
				&sampler_state[0],
1032
				src_filter, src_extend, border_color_bo);
1033
	gen7_sampler_state_init(sampler_state_bo,
1034
				&sampler_state[1],
1035
				mask_filter, mask_extend, border_color_bo);
1036
 
1037
	drm_intel_bo_unmap(sampler_state_bo);
1038
 
1039
	return sampler_state_bo;
1040
	(void)ret;
1041
}
1042
 
1043
static inline drm_intel_bo *
1044
i965_create_sampler_state(intel_screen_private *intel,
1045
			  sampler_state_filter_t src_filter,
1046
			  sampler_state_extend_t src_extend,
1047
			  sampler_state_filter_t mask_filter,
1048
			  sampler_state_extend_t mask_extend,
1049
			  drm_intel_bo * border_color_bo)
1050
{
1051
	if (INTEL_INFO(intel)->gen < 070)
1052
		return gen4_create_sampler_state(intel, src_filter, src_extend,
1053
						 mask_filter, mask_extend,
1054
						 border_color_bo);
1055
	return gen7_create_sampler_state(intel, src_filter, src_extend,
1056
					 mask_filter, mask_extend,
1057
					 border_color_bo);
1058
}
1059
 
1060
 
1061
static void
1062
cc_state_init(drm_intel_bo * cc_state_bo,
1063
	      uint32_t cc_state_offset,
1064
	      int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo)
1065
{
1066
	struct brw_cc_unit_state *cc_state;
1067
 
1068
	cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual +
1069
						cc_state_offset);
1070
 
1071
	memset(cc_state, 0, sizeof(*cc_state));
1072
	cc_state->cc0.stencil_enable = 0;	/* disable stencil */
1073
	cc_state->cc2.depth_test = 0;	/* disable depth test */
1074
	cc_state->cc2.logicop_enable = 0;	/* disable logic op */
1075
	cc_state->cc3.ia_blend_enable = 0;	/* blend alpha same as colors */
1076
	cc_state->cc3.blend_enable = 1;	/* enable color blend */
1077
	cc_state->cc3.alpha_test = 0;	/* disable alpha test */
1078
 
1079
	cc_state->cc4.cc_viewport_state_offset =
1080
	    intel_emit_reloc(cc_state_bo, cc_state_offset +
1081
			     offsetof(struct brw_cc_unit_state, cc4),
1082
			     cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
1083
 
1084
	cc_state->cc5.dither_enable = 0;	/* disable dither */
1085
	cc_state->cc5.logicop_func = 0xc;	/* COPY */
1086
	cc_state->cc5.statistics_enable = 1;
1087
	cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
1088
 
1089
	/* Fill in alpha blend factors same as color, for the future. */
1090
	cc_state->cc5.ia_src_blend_factor = src_blend;
1091
	cc_state->cc5.ia_dest_blend_factor = dst_blend;
1092
 
1093
	cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
1094
	cc_state->cc6.clamp_post_alpha_blend = 1;
1095
	cc_state->cc6.clamp_pre_alpha_blend = 1;
1096
	cc_state->cc6.clamp_range = 0;	/* clamp range [0,1] */
1097
 
1098
	cc_state->cc6.src_blend_factor = src_blend;
1099
	cc_state->cc6.dest_blend_factor = dst_blend;
1100
}
1101
 
1102
static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel,
1103
					  Bool has_mask,
1104
					  drm_intel_bo * kernel_bo,
1105
					  drm_intel_bo * sampler_bo)
1106
{
1107
	struct brw_wm_unit_state *state;
1108
	drm_intel_bo *wm_state_bo;
1109
	int ret;
1110
 
1111
	wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state",
1112
					 sizeof(*state), 4096);
1113
	assert(wm_state_bo);
1114
 
1115
	ret = drm_intel_bo_map(wm_state_bo, TRUE);
1116
	assert(ret == 0);
1117
 
1118
	state = memset(wm_state_bo->virtual, 0, sizeof(*state));
1119
	state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
1120
	state->thread0.kernel_start_pointer =
1121
	    intel_emit_reloc(wm_state_bo,
1122
			     offsetof(struct brw_wm_unit_state, thread0),
1123
			     kernel_bo, state->thread0.grf_reg_count << 1,
1124
			     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
1125
 
1126
	state->thread1.single_program_flow = 0;
1127
 
1128
	/* scratch space is not used in our kernel */
1129
	state->thread2.scratch_space_base_pointer = 0;
1130
	state->thread2.per_thread_scratch_space = 0;
1131
 
1132
	state->thread3.const_urb_entry_read_length = 0;
1133
	state->thread3.const_urb_entry_read_offset = 0;
1134
 
1135
	state->thread3.urb_entry_read_offset = 0;
1136
	/* wm kernel use urb from 3, see wm_program in compiler module */
1137
	state->thread3.dispatch_grf_start_reg = 3;	/* must match kernel */
1138
 
1139
	if (IS_GEN5(intel))
1140
		state->wm4.sampler_count = 0;	/* hardware requirement */
1141
	else
1142
		state->wm4.sampler_count = 1;	/* 1-4 samplers used */
1143
 
1144
	state->wm4.sampler_state_pointer =
1145
	    intel_emit_reloc(wm_state_bo,
1146
			     offsetof(struct brw_wm_unit_state, wm4),
1147
			     sampler_bo,
1148
			     state->wm4.sampler_count << 2,
1149
			     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
1150
	state->wm5.max_threads = PS_MAX_THREADS - 1;
1151
	state->wm5.transposed_urb_read = 0;
1152
	state->wm5.thread_dispatch_enable = 1;
1153
	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
1154
	 * start point
1155
	 */
1156
	state->wm5.enable_16_pix = 1;
1157
	state->wm5.enable_8_pix = 0;
1158
	state->wm5.early_depth_test = 1;
1159
 
1160
	/* Each pair of attributes (src/mask coords) is two URB entries */
1161
	if (has_mask) {
1162
		state->thread1.binding_table_entry_count = 3;	/* 2 tex and fb */
1163
		state->thread3.urb_entry_read_length = 4;
1164
	} else {
1165
		state->thread1.binding_table_entry_count = 2;	/* 1 tex and fb */
1166
		state->thread3.urb_entry_read_length = 2;
1167
	}
1168
 
1169
	/* binding table entry count is only used for prefetching, and it has to
1170
	 * be set 0 for Ironlake
1171
	 */
1172
	if (IS_GEN5(intel))
1173
		state->thread1.binding_table_entry_count = 0;
1174
 
1175
	drm_intel_bo_unmap(wm_state_bo);
1176
 
1177
	return wm_state_bo;
1178
	(void)ret;
1179
}
1180
 
1181
static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel)
1182
{
1183
	drm_intel_bo *bo;
1184
	struct brw_cc_viewport vp;
1185
	int ret;
1186
 
1187
	vp.min_depth = -1.e35;
1188
	vp.max_depth = 1.e35;
1189
 
1190
	bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state",
1191
				sizeof(vp), 4096);
1192
	assert(bo);
1193
 
1194
	ret = drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp);
1195
	assert(ret == 0);
1196
 
1197
	return bo;
1198
	(void)ret;
1199
}
1200
 
1201
static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel)
1202
{
1203
	struct brw_vs_unit_state vs_state;
1204
	memset(&vs_state, 0, sizeof(vs_state));
1205
 
1206
	/* Set up the vertex shader to be disabled (passthrough) */
1207
	if (IS_GEN5(intel))
1208
		vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;	/* hardware requirement */
1209
	else
1210
		vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES;
1211
	vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
1212
	vs_state.vs6.vs_enable = 0;
1213
	vs_state.vs6.vert_cache_disable = 1;
1214
 
1215
	return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state),
1216
				       "gen4 render VS state");
1217
}
1218
 
1219
/**
1220
 * Set up all combinations of cc state: each blendfactor for source and
1221
 * dest.
1222
 */
1223
static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel)
1224
{
1225
	drm_intel_bo *cc_state_bo, *cc_vp_bo;
1226
	int i, j, ret;
1227
 
1228
	cc_vp_bo = gen4_create_cc_viewport(intel);
1229
 
1230
	cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state",
1231
					 sizeof(struct gen4_cc_unit_state),
1232
					 4096);
1233
	assert(cc_state_bo);
1234
 
1235
	ret = drm_intel_bo_map(cc_state_bo, TRUE);
1236
	assert(ret == 0);
1237
 
1238
	for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) {
1239
		for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) {
1240
			cc_state_init(cc_state_bo,
1241
				      offsetof(struct gen4_cc_unit_state,
1242
					       cc_state[i][j].state),
1243
				      i, j, cc_vp_bo);
1244
		}
1245
	}
1246
	drm_intel_bo_unmap(cc_state_bo);
1247
 
1248
	drm_intel_bo_unreference(cc_vp_bo);
1249
 
1250
	return cc_state_bo;
1251
	(void)ret;
1252
}
1253
 
1254
static uint32_t i965_get_card_format(PicturePtr picture)
1255
{
1256
	int i;
1257
 
1258
	for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
1259
	     i++) {
1260
		if (i965_tex_formats[i].fmt == picture->format)
1261
			break;
1262
	}
1263
	assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]));
1264
 
1265
	return i965_tex_formats[i].card_fmt;
1266
}
1267
 
1268
static sampler_state_filter_t sampler_state_filter_from_picture(int filter)
1269
{
1270
	switch (filter) {
1271
	case PictFilterNearest:
1272
		return SS_FILTER_NEAREST;
1273
	case PictFilterBilinear:
1274
		return SS_FILTER_BILINEAR;
1275
	default:
1276
		return SS_INVALID_FILTER;
1277
	}
1278
}
1279
 
1280
static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type)
1281
{
1282
	switch (repeat_type) {
1283
	case RepeatNone:
1284
		return SS_EXTEND_NONE;
1285
	case RepeatNormal:
1286
		return SS_EXTEND_REPEAT;
1287
	case RepeatPad:
1288
		return SS_EXTEND_PAD;
1289
	case RepeatReflect:
1290
		return SS_EXTEND_REFLECT;
1291
	default:
1292
		return SS_INVALID_EXTEND;
1293
	}
1294
}
1295
 
1296
/**
1297
 * Sets up the common fields for a surface state buffer for the given
1298
 * picture in the given surface state buffer.
1299
 */
1300
static int
1301
gen4_set_picture_surface_state(intel_screen_private *intel,
1302
			       PicturePtr picture, PixmapPtr pixmap,
1303
			       Bool is_dst)
1304
{
1305
	struct intel_pixmap *priv = pixmap->private;
1306
	struct brw_surface_state *ss;
1307
	uint32_t write_domain, read_domains;
1308
	int offset;
1309
 
1310
	if (is_dst) {
1311
		write_domain = I915_GEM_DOMAIN_RENDER;
1312
		read_domains = I915_GEM_DOMAIN_RENDER;
1313
	} else {
1314
		write_domain = 0;
1315
		read_domains = I915_GEM_DOMAIN_SAMPLER;
1316
	}
1317
	intel_batch_mark_pixmap_domains(intel, priv,
1318
					read_domains, write_domain);
1319
	ss = (struct brw_surface_state *)
1320
		(intel->surface_data + intel->surface_used);
1321
 
1322
	memset(ss, 0, sizeof(*ss));
1323
	ss->ss0.surface_type = BRW_SURFACE_2D;
1324
	if (is_dst)
1325
		ss->ss0.surface_format = i965_get_dest_format(picture);
1326
	else
1327
		ss->ss0.surface_format = i965_get_card_format(picture);
1328
 
1329
	ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
1330
	ss->ss0.color_blend = 1;
1331
	ss->ss1.base_addr = priv->bo->offset;
1332
 
1333
	ss->ss2.height = pixmap->drawable.height - 1;
1334
	ss->ss2.width = pixmap->drawable.width - 1;
1335
	ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
1336
	ss->ss3.tile_walk = 0;	/* Tiled X */
1337
	ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0;
1338
 
1339
	dri_bo_emit_reloc(intel->surface_bo,
1340
			  read_domains, write_domain,
1341
			  0,
1342
			  intel->surface_used +
1343
			  offsetof(struct brw_surface_state, ss1),
1344
			  priv->bo);
1345
 
1346
	offset = intel->surface_used;
1347
	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
1348
 
1349
	return offset;
1350
}
1351
 
1352
static int
1353
gen7_set_picture_surface_state(intel_screen_private *intel,
1354
			       PicturePtr picture, PixmapPtr pixmap,
1355
			       Bool is_dst)
1356
{
1357
	struct intel_pixmap *priv = pixmap->private;
1358
	struct gen7_surface_state *ss;
1359
	uint32_t write_domain, read_domains;
1360
	int offset;
1361
 
1362
	if (is_dst) {
1363
		write_domain = I915_GEM_DOMAIN_RENDER;
1364
		read_domains = I915_GEM_DOMAIN_RENDER;
1365
	} else {
1366
		write_domain = 0;
1367
		read_domains = I915_GEM_DOMAIN_SAMPLER;
1368
	}
1369
	intel_batch_mark_pixmap_domains(intel, priv,
1370
					read_domains, write_domain);
1371
	ss = (struct gen7_surface_state *)
1372
		(intel->surface_data + intel->surface_used);
1373
 
1374
	memset(ss, 0, sizeof(*ss));
1375
	ss->ss0.surface_type = BRW_SURFACE_2D;
1376
	if (is_dst)
1377
		ss->ss0.surface_format = i965_get_dest_format(picture);
1378
	else
1379
		ss->ss0.surface_format = i965_get_card_format(picture);
1380
 
1381
	ss->ss0.tile_walk = 0;	/* Tiled X */
1382
	ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0;
1383
	ss->ss1.base_addr = priv->bo->offset;
1384
 
1385
	ss->ss2.height = pixmap->drawable.height - 1;
1386
	ss->ss2.width = pixmap->drawable.width - 1;
1387
	ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
1388
 
1389
	if (IS_HSW(intel)) {
1390
		ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1391
		ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1392
		ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1393
		ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1394
	}
1395
 
1396
	dri_bo_emit_reloc(intel->surface_bo,
1397
			  read_domains, write_domain,
1398
			  0,
1399
			  intel->surface_used +
1400
			  offsetof(struct gen7_surface_state, ss1),
1401
			  priv->bo);
1402
 
1403
	offset = intel->surface_used;
1404
	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
1405
 
1406
	return offset;
1407
}
1408
 
1409
static inline int
1410
i965_set_picture_surface_state(intel_screen_private *intel,
1411
			       PicturePtr picture, PixmapPtr pixmap,
1412
			       Bool is_dst)
1413
{
1414
    if (INTEL_INFO(intel)->gen < 070)
1415
        return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst);
1416
    return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst);
1417
}
1418
 
1419
static void gen4_composite_vertex_elements(struct intel_screen_private *intel)
1420
{
1421
	struct gen4_render_state *render_state = intel->gen4_render_state;
1422
	gen4_composite_op *composite_op = &render_state->composite_op;
1423
	Bool has_mask = intel->render_mask != NULL;
1424
	Bool is_affine = composite_op->is_affine;
1425
	/*
1426
	 * number of extra parameters per vertex
1427
	 */
1428
	int nelem = has_mask ? 2 : 1;
1429
	/*
1430
	 * size of extra parameters:
1431
	 *  3 for homogenous (xyzw)
1432
	 *  2 for cartesian (xy)
1433
	 */
1434
	int selem = is_affine ? 2 : 3;
1435
	uint32_t w_component;
1436
	uint32_t src_format;
1437
	int id;
1438
 
1439
	id = has_mask << 1 | is_affine;
1440
 
1441
	if (composite_op->vertex_id == id)
1442
		return;
1443
 
1444
	composite_op->vertex_id = id;
1445
 
1446
	if (is_affine) {
1447
		src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
1448
		w_component = BRW_VFCOMPONENT_STORE_1_FLT;
1449
	} else {
1450
		src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
1451
		w_component = BRW_VFCOMPONENT_STORE_SRC;
1452
	}
1453
 
1454
	if (IS_GEN5(intel)) {
1455
		/*
1456
		 * The reason to add this extra vertex element in the header is that
1457
		 * Ironlake has different vertex header definition and origin method to
1458
		 * set destination element offset doesn't exist anymore, which means
1459
		 * hardware requires a predefined vertex element layout.
1460
		 *
1461
		 * haihao proposed this approach to fill the first vertex element, so
1462
		 * origin layout for Gen4 doesn't need to change, and origin shader
1463
		 * programs behavior is also kept.
1464
		 *
1465
		 * I think this is not bad. - zhenyu
1466
		 */
1467
 
1468
		OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
1469
			  ((2 * (2 + nelem)) - 1));
1470
		OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1471
			  (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1472
			  (0 << VE0_OFFSET_SHIFT));
1473
 
1474
		OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
1475
			  (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
1476
			  (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
1477
			  (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
1478
	} else {
1479
		/* Set up our vertex elements, sourced from the single vertex buffer.
1480
		 * that will be set up later.
1481
		 */
1482
		OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
1483
			  ((2 * (1 + nelem)) - 1));
1484
	}
1485
 
1486
	/* x,y */
1487
	OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1488
		  (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1489
		  (0 << VE0_OFFSET_SHIFT));
1490
 
1491
	if (IS_GEN5(intel))
1492
		OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1493
			  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1494
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1495
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1496
	else
1497
		OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1498
			  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1499
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1500
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1501
			  (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1502
	/* u0, v0, w0 */
1503
	OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1504
		  (src_format << VE0_FORMAT_SHIFT) |
1505
		  ((2 * 4) << VE0_OFFSET_SHIFT));	/* offset vb in bytes */
1506
 
1507
	if (IS_GEN5(intel))
1508
		OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1509
			  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1510
			  (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1511
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1512
	else
1513
		OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1514
			  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1515
			  (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1516
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1517
			  ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));	/* VUE offset in dwords */
1518
	/* u1, v1, w1 */
1519
	if (has_mask) {
1520
		OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1521
			  (src_format << VE0_FORMAT_SHIFT) |
1522
			  (((2 + selem) * 4) << VE0_OFFSET_SHIFT));	/* vb offset in bytes */
1523
 
1524
		if (IS_GEN5(intel))
1525
			OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1526
				  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1527
				  (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1528
				  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1529
		else
1530
			OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1531
				  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1532
				  (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1533
				  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1534
				  ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));	/* VUE offset in dwords */
1535
	}
1536
}
1537
 
1538
static void i965_emit_composite_state(struct intel_screen_private *intel)
1539
{
1540
	struct gen4_render_state *render_state = intel->gen4_render_state;
1541
	gen4_composite_op *composite_op = &render_state->composite_op;
1542
	int op = composite_op->op;
1543
	PicturePtr mask_picture = intel->render_mask_picture;
1544
	PicturePtr dest_picture = intel->render_dest_picture;
1545
	PixmapPtr mask = intel->render_mask;
1546
	PixmapPtr dest = intel->render_dest;
1547
	sampler_state_filter_t src_filter = composite_op->src_filter;
1548
	sampler_state_filter_t mask_filter = composite_op->mask_filter;
1549
	sampler_state_extend_t src_extend = composite_op->src_extend;
1550
	sampler_state_extend_t mask_extend = composite_op->mask_extend;
1551
	uint32_t src_blend, dst_blend;
1552
 
1553
	intel->needs_render_state_emit = FALSE;
1554
 
1555
	/* Begin the long sequence of commands needed to set up the 3D
1556
	 * rendering pipe
1557
	 */
1558
 
1559
	if (intel->needs_3d_invariant) {
1560
		if (IS_GEN5(intel)) {
1561
			/* Ironlake errata workaround: Before disabling the clipper,
1562
			 * you have to MI_FLUSH to get the pipeline idle.
1563
			 */
1564
			OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
1565
		}
1566
 
1567
		/* Match Mesa driver setup */
1568
		if (INTEL_INFO(intel)->gen >= 045)
1569
			OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1570
		else
1571
			OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1572
 
1573
		/* Set system instruction pointer */
1574
		OUT_BATCH(BRW_STATE_SIP | 0);
1575
		OUT_BATCH(0);
1576
 
1577
		intel->needs_3d_invariant = FALSE;
1578
	}
1579
 
1580
	if (intel->surface_reloc == 0) {
1581
		/* Zero out the two base address registers so all offsets are
1582
		 * absolute.
1583
		 */
1584
		if (IS_GEN5(intel)) {
1585
			OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6);
1586
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);	/* Generate state base address */
1587
			intel->surface_reloc = intel->batch_used;
1588
			intel_batch_emit_dword(intel,
1589
					       intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
1590
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);	/* media base addr, don't care */
1591
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);	/* Instruction base address */
1592
			/* general state max addr, disabled */
1593
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1594
			/* media object state max addr, disabled */
1595
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1596
			/* Instruction max addr, disabled */
1597
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1598
		} else {
1599
			OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
1600
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);	/* Generate state base address */
1601
			intel->surface_reloc = intel->batch_used;
1602
			intel_batch_emit_dword(intel,
1603
					       intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
1604
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);	/* media base addr, don't care */
1605
			/* general state max addr, disabled */
1606
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1607
			/* media object state max addr, disabled */
1608
			OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1609
		}
1610
	}
1611
 
1612
	i965_get_blend_cntl(op, mask_picture, dest_picture->format,
1613
			    &src_blend, &dst_blend);
1614
 
1615
	/* Binding table pointers */
1616
	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
1617
	OUT_BATCH(0);	/* vs */
1618
	OUT_BATCH(0);	/* gs */
1619
	OUT_BATCH(0);	/* clip */
1620
	OUT_BATCH(0);	/* sf */
1621
	/* Only the PS uses the binding table */
1622
	OUT_BATCH(intel->surface_table);
1623
 
1624
	/* The drawing rectangle clipping is always on.  Set it to values that
1625
	 * shouldn't do any clipping.
1626
	 */
1627
	OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
1628
	OUT_BATCH(0x00000000);	/* ymin, xmin */
1629
	OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
1630
		  DRAW_XMAX(dest->drawable.width - 1));	/* ymax, xmax */
1631
	OUT_BATCH(0x00000000);	/* yorigin, xorigin */
1632
 
1633
	/* skip the depth buffer */
1634
	/* skip the polygon stipple */
1635
	/* skip the polygon stipple offset */
1636
	/* skip the line stipple */
1637
 
1638
	/* Set the pointers to the 3d pipeline state */
1639
	OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
1640
	OUT_RELOC(render_state->vs_state_bo,
1641
		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1642
	OUT_BATCH(BRW_GS_DISABLE);	/* disable GS, resulting in passthrough */
1643
	OUT_BATCH(BRW_CLIP_DISABLE);	/* disable CLIP, resulting in passthrough */
1644
	if (mask) {
1645
		OUT_RELOC(render_state->sf_mask_state_bo,
1646
			  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1647
	} else {
1648
		OUT_RELOC(render_state->sf_state_bo,
1649
			  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1650
	}
1651
 
1652
	OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel]
1653
		  [src_filter][src_extend]
1654
		  [mask_filter][mask_extend],
1655
		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1656
 
1657
	OUT_RELOC(render_state->cc_state_bo,
1658
		  I915_GEM_DOMAIN_INSTRUCTION, 0,
1659
		  offsetof(struct gen4_cc_unit_state,
1660
			   cc_state[src_blend][dst_blend]));
1661
 
1662
	{
1663
		int urb_vs_start, urb_vs_size;
1664
		int urb_gs_start, urb_gs_size;
1665
		int urb_clip_start, urb_clip_size;
1666
		int urb_sf_start, urb_sf_size;
1667
		int urb_cs_start, urb_cs_size;
1668
 
1669
		urb_vs_start = 0;
1670
		urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1671
		urb_gs_start = urb_vs_start + urb_vs_size;
1672
		urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1673
		urb_clip_start = urb_gs_start + urb_gs_size;
1674
		urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1675
		urb_sf_start = urb_clip_start + urb_clip_size;
1676
		urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1677
		urb_cs_start = urb_sf_start + urb_sf_size;
1678
		urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1679
 
1680
		/* Erratum (Vol 1a, p32):
1681
		 *   URB_FENCE must not cross a cache-line (64 bytes).
1682
		 */
1683
		if ((intel->batch_used & 15) > (16 - 3)) {
1684
			int cnt = 16 - (intel->batch_used & 15);
1685
			while (cnt--)
1686
				OUT_BATCH(MI_NOOP);
1687
		}
1688
 
1689
		OUT_BATCH(BRW_URB_FENCE |
1690
			  UF0_CS_REALLOC |
1691
			  UF0_SF_REALLOC |
1692
			  UF0_CLIP_REALLOC |
1693
			  UF0_GS_REALLOC |
1694
			  UF0_VS_REALLOC |
1695
			  1);
1696
		OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1697
			  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1698
			  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1699
		OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1700
			  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1701
 
1702
		/* Constant buffer state */
1703
		OUT_BATCH(BRW_CS_URB_STATE | 0);
1704
		OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
1705
			  (URB_CS_ENTRIES << 0));
1706
	}
1707
 
1708
	gen4_composite_vertex_elements(intel);
1709
}
1710
 
1711
/**
1712
 * Returns whether the current set of composite state plus vertex buffer is
1713
 * expected to fit in the aperture.
1714
 */
1715
static Bool i965_composite_check_aperture(intel_screen_private *intel)
1716
{
1717
	struct gen4_render_state *render_state = intel->gen4_render_state;
1718
	gen4_composite_op *composite_op = &render_state->composite_op;
1719
	drm_intel_bo *bo_table[] = {
1720
		intel->batch_bo,
1721
		intel->vertex_bo,
1722
		intel->surface_bo,
1723
		render_state->vs_state_bo,
1724
		render_state->sf_state_bo,
1725
		render_state->sf_mask_state_bo,
1726
		render_state->wm_state_bo[composite_op->wm_kernel]
1727
		    [composite_op->src_filter]
1728
		    [composite_op->src_extend]
1729
		    [composite_op->mask_filter]
1730
		    [composite_op->mask_extend],
1731
		render_state->cc_state_bo,
1732
	};
1733
	drm_intel_bo *gen6_bo_table[] = {
1734
		intel->batch_bo,
1735
		intel->vertex_bo,
1736
		intel->surface_bo,
1737
		render_state->wm_kernel_bo[composite_op->wm_kernel],
1738
		render_state->ps_sampler_state_bo[composite_op->src_filter]
1739
		    [composite_op->src_extend]
1740
		    [composite_op->mask_filter]
1741
		    [composite_op->mask_extend],
1742
		render_state->cc_vp_bo,
1743
		render_state->cc_state_bo,
1744
		render_state->gen6_blend_bo,
1745
		render_state->gen6_depth_stencil_bo,
1746
	};
1747
 
1748
	if (INTEL_INFO(intel)->gen >= 060)
1749
		return drm_intel_bufmgr_check_aperture_space(gen6_bo_table,
1750
							ARRAY_SIZE(gen6_bo_table)) == 0;
1751
	else
1752
		return drm_intel_bufmgr_check_aperture_space(bo_table,
1753
							ARRAY_SIZE(bo_table)) == 0;
1754
}
1755
 
1756
static void i965_surface_flush(struct intel_screen_private *intel)
1757
{
1758
	int ret;
1759
 
1760
	ret = drm_intel_bo_subdata(intel->surface_bo,
1761
				   0, intel->surface_used,
1762
				   intel->surface_data);
1763
	assert(ret == 0);
1764
	intel->surface_used = 0;
1765
 
1766
	assert (intel->surface_reloc != 0);
1767
	drm_intel_bo_emit_reloc(intel->batch_bo,
1768
				intel->surface_reloc * 4,
1769
				intel->surface_bo, BASE_ADDRESS_MODIFY,
1770
				I915_GEM_DOMAIN_INSTRUCTION, 0);
1771
	intel->surface_reloc = 0;
1772
 
1773
	drm_intel_bo_unreference(intel->surface_bo);
1774
	intel->surface_bo =
1775
		drm_intel_bo_alloc(intel->bufmgr, "surface data",
1776
				   sizeof(intel->surface_data), 4096);
1777
	assert(intel->surface_bo);
1778
 
1779
	return;
1780
	(void)ret;
1781
}
1782
 
1783
static void
1784
i965_emit_composite_primitive_identity_source(intel_screen_private *intel,
1785
					      int srcX, int srcY,
1786
					      int maskX, int maskY,
1787
					      int dstX, int dstY,
1788
					      int w, int h)
1789
{
1790
	OUT_VERTEX(dstX + w);
1791
	OUT_VERTEX(dstY + h);
1792
	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
1793
	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1794
 
1795
	OUT_VERTEX(dstX);
1796
	OUT_VERTEX(dstY + h);
1797
	OUT_VERTEX(srcX * intel->scale_units[0][0]);
1798
	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1799
 
1800
	OUT_VERTEX(dstX);
1801
	OUT_VERTEX(dstY);
1802
	OUT_VERTEX(srcX * intel->scale_units[0][0]);
1803
	OUT_VERTEX(srcY * intel->scale_units[0][1]);
1804
}
1805
 
1806
static void
1807
i965_emit_composite_primitive_affine_source(intel_screen_private *intel,
1808
					    int srcX, int srcY,
1809
					    int maskX, int maskY,
1810
					    int dstX, int dstY,
1811
					    int w, int h)
1812
{
1813
	float src_x[3], src_y[3];
1814
 
1815
	if (!intel_get_transformed_coordinates(srcX, srcY,
1816
					      intel->transform[0],
1817
					      &src_x[0],
1818
					      &src_y[0]))
1819
		return;
1820
 
1821
	if (!intel_get_transformed_coordinates(srcX, srcY + h,
1822
					      intel->transform[0],
1823
					      &src_x[1],
1824
					      &src_y[1]))
1825
		return;
1826
 
1827
	if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
1828
					      intel->transform[0],
1829
					      &src_x[2],
1830
					      &src_y[2]))
1831
		return;
1832
 
1833
	OUT_VERTEX(dstX + w);
1834
	OUT_VERTEX(dstY + h);
1835
	OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
1836
	OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
1837
 
1838
	OUT_VERTEX(dstX);
1839
	OUT_VERTEX(dstY + h);
1840
	OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
1841
	OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
1842
 
1843
	OUT_VERTEX(dstX);
1844
	OUT_VERTEX(dstY);
1845
	OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
1846
	OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
1847
}
1848
 
1849
static void
1850
i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
1851
						   int srcX, int srcY,
1852
						   int maskX, int maskY,
1853
						   int dstX, int dstY,
1854
						   int w, int h)
1855
{
1856
	OUT_VERTEX(dstX + w);
1857
	OUT_VERTEX(dstY + h);
1858
	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
1859
	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1860
	OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
1861
	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
1862
 
1863
	OUT_VERTEX(dstX);
1864
	OUT_VERTEX(dstY + h);
1865
	OUT_VERTEX(srcX * intel->scale_units[0][0]);
1866
	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1867
	OUT_VERTEX(maskX * intel->scale_units[1][0]);
1868
	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
1869
 
1870
	OUT_VERTEX(dstX);
1871
	OUT_VERTEX(dstY);
1872
	OUT_VERTEX(srcX * intel->scale_units[0][0]);
1873
	OUT_VERTEX(srcY * intel->scale_units[0][1]);
1874
	OUT_VERTEX(maskX * intel->scale_units[1][0]);
1875
	OUT_VERTEX(maskY * intel->scale_units[1][1]);
1876
}
1877
 
1878
static void
1879
i965_emit_composite_primitive(intel_screen_private *intel,
1880
			      int srcX, int srcY,
1881
			      int maskX, int maskY,
1882
			      int dstX, int dstY,
1883
			      int w, int h)
1884
{
1885
	float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
1886
	Bool is_affine = intel->gen4_render_state->composite_op.is_affine;
1887
 
1888
	if (is_affine) {
1889
		if (!intel_get_transformed_coordinates(srcX, srcY,
1890
						       intel->transform[0],
1891
						       &src_x[0],
1892
						       &src_y[0]))
1893
			return;
1894
 
1895
		if (!intel_get_transformed_coordinates(srcX, srcY + h,
1896
						       intel->transform[0],
1897
						       &src_x[1],
1898
						       &src_y[1]))
1899
			return;
1900
 
1901
		if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
1902
						       intel->transform[0],
1903
						       &src_x[2],
1904
						       &src_y[2]))
1905
			return;
1906
	} else {
1907
		if (!intel_get_transformed_coordinates_3d(srcX, srcY,
1908
							  intel->transform[0],
1909
							  &src_x[0],
1910
							  &src_y[0],
1911
							  &src_w[0]))
1912
			return;
1913
 
1914
		if (!intel_get_transformed_coordinates_3d(srcX, srcY + h,
1915
							  intel->transform[0],
1916
							  &src_x[1],
1917
							  &src_y[1],
1918
							  &src_w[1]))
1919
			return;
1920
 
1921
		if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h,
1922
							  intel->transform[0],
1923
							  &src_x[2],
1924
							  &src_y[2],
1925
							  &src_w[2]))
1926
			return;
1927
	}
1928
 
1929
	if (intel->render_mask) {
1930
		if (is_affine) {
1931
			if (!intel_get_transformed_coordinates(maskX, maskY,
1932
							      intel->transform[1],
1933
							      &mask_x[0],
1934
							      &mask_y[0]))
1935
				return;
1936
 
1937
			if (!intel_get_transformed_coordinates(maskX, maskY + h,
1938
							      intel->transform[1],
1939
							      &mask_x[1],
1940
							      &mask_y[1]))
1941
				return;
1942
 
1943
			if (!intel_get_transformed_coordinates(maskX + w, maskY + h,
1944
							      intel->transform[1],
1945
							      &mask_x[2],
1946
							      &mask_y[2]))
1947
				return;
1948
		} else {
1949
			if (!intel_get_transformed_coordinates_3d(maskX, maskY,
1950
								 intel->transform[1],
1951
								 &mask_x[0],
1952
								 &mask_y[0],
1953
								 &mask_w[0]))
1954
				return;
1955
 
1956
			if (!intel_get_transformed_coordinates_3d(maskX, maskY + h,
1957
								 intel->transform[1],
1958
								 &mask_x[1],
1959
								 &mask_y[1],
1960
								 &mask_w[1]))
1961
				return;
1962
 
1963
			if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h,
1964
								 intel->transform[1],
1965
								 &mask_x[2],
1966
								 &mask_y[2],
1967
								 &mask_w[2]))
1968
				return;
1969
		}
1970
	}
1971
 
1972
	OUT_VERTEX(dstX + w);
1973
	OUT_VERTEX(dstY + h);
1974
	OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
1975
	OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
1976
	if (!is_affine)
1977
		OUT_VERTEX(src_w[2]);
1978
	if (intel->render_mask) {
1979
		OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]);
1980
		OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]);
1981
		if (!is_affine)
1982
			OUT_VERTEX(mask_w[2]);
1983
	}
1984
 
1985
	OUT_VERTEX(dstX);
1986
	OUT_VERTEX(dstY + h);
1987
	OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
1988
	OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
1989
	if (!is_affine)
1990
		OUT_VERTEX(src_w[1]);
1991
	if (intel->render_mask) {
1992
		OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]);
1993
		OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]);
1994
		if (!is_affine)
1995
			OUT_VERTEX(mask_w[1]);
1996
	}
1997
 
1998
	OUT_VERTEX(dstX);
1999
	OUT_VERTEX(dstY);
2000
	OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
2001
	OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
2002
	if (!is_affine)
2003
		OUT_VERTEX(src_w[0]);
2004
	if (intel->render_mask) {
2005
		OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]);
2006
		OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]);
2007
		if (!is_affine)
2008
			OUT_VERTEX(mask_w[0]);
2009
	}
2010
}
2011
 
2012
Bool
2013
i965_prepare_composite(int op, PicturePtr source_picture,
2014
		       PicturePtr mask_picture, PicturePtr dest_picture,
2015
		       PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
2016
{
2017
	intel_screen_private *intel = intel_get_screen_private();
2018
	struct gen4_render_state *render_state = intel->gen4_render_state;
2019
	gen4_composite_op *composite_op = &render_state->composite_op;
2020
 
2021
	composite_op->src_filter =
2022
	    sampler_state_filter_from_picture(source_picture->filter);
2023
	if (composite_op->src_filter == SS_INVALID_FILTER) {
2024
		intel_debug_fallback("Bad src filter 0x%x\n",
2025
				     source_picture->filter);
2026
		return FALSE;
2027
	}
2028
	composite_op->src_extend =
2029
	    sampler_state_extend_from_picture(source_picture->repeatType);
2030
	if (composite_op->src_extend == SS_INVALID_EXTEND) {
2031
		intel_debug_fallback("Bad src repeat 0x%x\n",
2032
				     source_picture->repeatType);
2033
		return FALSE;
2034
	}
2035
 
2036
	if (mask_picture) {
2037
		if (mask_picture->componentAlpha &&
2038
		    PICT_FORMAT_RGB(mask_picture->format)) {
2039
			/* Check if it's component alpha that relies on a source alpha and on
2040
			 * the source value.  We can only get one of those into the single
2041
			 * source value that we get to blend with.
2042
			 */
2043
			if (i965_blend_op[op].src_alpha &&
2044
			    (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) {
2045
				intel_debug_fallback("Component alpha not supported "
2046
						     "with source alpha and source "
2047
						     "value blending.\n");
2048
				return FALSE;
2049
			}
2050
		}
2051
 
2052
		composite_op->mask_filter =
2053
		    sampler_state_filter_from_picture(mask_picture->filter);
2054
		if (composite_op->mask_filter == SS_INVALID_FILTER) {
2055
			intel_debug_fallback("Bad mask filter 0x%x\n",
2056
					     mask_picture->filter);
2057
			return FALSE;
2058
		}
2059
		composite_op->mask_extend =
2060
		    sampler_state_extend_from_picture(mask_picture->repeatType);
2061
		if (composite_op->mask_extend == SS_INVALID_EXTEND) {
2062
			intel_debug_fallback("Bad mask repeat 0x%x\n",
2063
					     mask_picture->repeatType);
2064
			return FALSE;
2065
		}
2066
	} else {
2067
		composite_op->mask_filter = SS_FILTER_NEAREST;
2068
		composite_op->mask_extend = SS_EXTEND_NONE;
2069
	}
2070
 
2071
	/* Flush any pending writes prior to relocating the textures. */
2072
	if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask))
2073
		intel_batch_emit_flush();
2074
 
2075
	composite_op->op = op;
2076
	intel->render_source_picture = source_picture;
2077
	intel->render_mask_picture = mask_picture;
2078
	intel->render_dest_picture = dest_picture;
2079
	intel->render_source = source;
2080
	intel->render_mask = mask;
2081
	intel->render_dest = dest;
2082
 
2083
	intel->scale_units[0][0] = 1. / source->drawable.width;
2084
	intel->scale_units[0][1] = 1. / source->drawable.height;
2085
 
2086
	intel->transform[0] = source_picture->transform;
2087
	composite_op->is_affine = intel_transform_is_affine(intel->transform[0]);
2088
 
2089
	if (mask_picture == NULL) {
2090
		intel->transform[1] = NULL;
2091
		intel->scale_units[1][0] = -1;
2092
		intel->scale_units[1][1] = -1;
2093
	} else {
2094
		assert(mask != NULL);
2095
		intel->transform[1] = mask_picture->transform;
2096
		intel->scale_units[1][0] = 1. / mask->drawable.width;
2097
		intel->scale_units[1][1] = 1. / mask->drawable.height;
2098
		composite_op->is_affine &=
2099
		    intel_transform_is_affine(intel->transform[1]);
2100
	}
2101
 
2102
	if (mask) {
2103
		if (mask_picture->componentAlpha &&
2104
		    PICT_FORMAT_RGB(mask_picture->format)) {
2105
			if (i965_blend_op[op].src_alpha) {
2106
				if (composite_op->is_affine)
2107
					composite_op->wm_kernel =
2108
					    WM_KERNEL_MASKCA_SRCALPHA_AFFINE;
2109
				else
2110
					composite_op->wm_kernel =
2111
					    WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE;
2112
			} else {
2113
				if (composite_op->is_affine)
2114
					composite_op->wm_kernel =
2115
					    WM_KERNEL_MASKCA_AFFINE;
2116
				else
2117
					composite_op->wm_kernel =
2118
					    WM_KERNEL_MASKCA_PROJECTIVE;
2119
			}
2120
		} else {
2121
			if (composite_op->is_affine)
2122
				composite_op->wm_kernel =
2123
				    WM_KERNEL_MASKNOCA_AFFINE;
2124
			else
2125
				composite_op->wm_kernel =
2126
				    WM_KERNEL_MASKNOCA_PROJECTIVE;
2127
		}
2128
	} else {
2129
		if (composite_op->is_affine)
2130
			composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE;
2131
		else
2132
			composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE;
2133
	}
2134
 
2135
	intel->prim_emit = i965_emit_composite_primitive;
2136
	if (!mask) {
2137
		if (intel->transform[0] == NULL)
2138
			intel->prim_emit = i965_emit_composite_primitive_identity_source;
2139
		else if (composite_op->is_affine)
2140
			intel->prim_emit = i965_emit_composite_primitive_affine_source;
2141
	} else {
2142
		if (intel->transform[0] == NULL && intel->transform[1] == NULL)
2143
			intel->prim_emit = i965_emit_composite_primitive_identity_source_mask;
2144
	}
2145
 
2146
	intel->floats_per_vertex =
2147
		2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3);
2148
 
2149
	if (!i965_composite_check_aperture(intel)) {
2150
		intel_batch_submit();
2151
		if (!i965_composite_check_aperture(intel)) {
2152
			intel_debug_fallback("Couldn't fit render operation "
2153
					     "in aperture\n");
2154
			return FALSE;
2155
		}
2156
	}
2157
 
2158
	if (sizeof(intel->surface_data) - intel->surface_used <
2159
	    4 * SURFACE_STATE_PADDED_SIZE)
2160
		i965_surface_flush(intel);
2161
 
2162
	intel->needs_render_state_emit = TRUE;
2163
 
2164
	return TRUE;
2165
}
2166
 
2167
static void i965_select_vertex_buffer(struct intel_screen_private *intel)
2168
{
2169
	int id = intel->gen4_render_state->composite_op.vertex_id;
2170
	int modifyenable = 0;
2171
 
2172
	if (intel->vertex_id & (1 << id))
2173
		return;
2174
 
2175
	if (INTEL_INFO(intel)->gen >= 070)
2176
		modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE;
2177
 
2178
	/* Set up the pointer to our (single) vertex buffer */
2179
	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
2180
 
2181
	/* XXX could use multiple vbo to reduce relocations if
2182
	 * frequently switching between vertex sizes, like rgb10text.
2183
	 */
2184
	if (INTEL_INFO(intel)->gen >= 060) {
2185
		OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2186
			  GEN6_VB0_VERTEXDATA |
2187
			  modifyenable |
2188
			  (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
2189
	} else {
2190
		OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) |
2191
			  VB0_VERTEXDATA |
2192
			  (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
2193
	}
2194
	OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
2195
	if (INTEL_INFO(intel)->gen >= 050)
2196
		OUT_RELOC(intel->vertex_bo,
2197
			  I915_GEM_DOMAIN_VERTEX, 0,
2198
			  sizeof(intel->vertex_ptr) - 1);
2199
	else
2200
		OUT_BATCH(0);
2201
	OUT_BATCH(0);		// ignore for VERTEXDATA, but still there
2202
 
2203
	intel->vertex_id |= 1 << id;
2204
}
2205
 
2206
static void i965_bind_surfaces(struct intel_screen_private *intel)
2207
{
2208
	uint32_t *binding_table;
2209
 
2210
	assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data));
2211
 
2212
	binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
2213
	intel->surface_table = intel->surface_used;
2214
	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
2215
 
2216
	binding_table[0] =
2217
		i965_set_picture_surface_state(intel,
2218
					       intel->render_dest_picture,
2219
					       intel->render_dest,
2220
					       TRUE);
2221
	binding_table[1] =
2222
		i965_set_picture_surface_state(intel,
2223
					       intel->render_source_picture,
2224
					       intel->render_source,
2225
					       FALSE);
2226
	if (intel->render_mask) {
2227
		binding_table[2] =
2228
			i965_set_picture_surface_state(intel,
2229
						       intel->render_mask_picture,
2230
						       intel->render_mask,
2231
						       FALSE);
2232
	}
2233
}
2234
 
2235
void
2236
i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
2237
	       int dstX, int dstY, int w, int h)
2238
{
2239
	intel_screen_private *intel = intel_get_screen_private();
2240
 
2241
	intel_batch_start_atomic(200);
2242
	if (intel->needs_render_state_emit) {
2243
		i965_bind_surfaces(intel);
2244
 
2245
		if (INTEL_INFO(intel)->gen >= 060)
2246
			gen6_emit_composite_state(intel);
2247
		else
2248
			i965_emit_composite_state(intel);
2249
	}
2250
 
2251
	if (intel->floats_per_vertex != intel->last_floats_per_vertex) {
2252
		intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex;
2253
		intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
2254
		intel->last_floats_per_vertex = intel->floats_per_vertex;
2255
	}
2256
	if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
2257
		i965_vertex_flush(intel);
2258
		intel_next_vertex(intel);
2259
		intel->vertex_index = 0;
2260
	}
2261
	i965_select_vertex_buffer(intel);
2262
 
2263
	if (intel->vertex_offset == 0) {
2264
		if (INTEL_INFO(intel)->gen >= 070) {
2265
			OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2));
2266
			OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2267
				  _3DPRIM_RECTLIST);
2268
		} else {
2269
			OUT_BATCH(BRW_3DPRIMITIVE |
2270
				  BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2271
				  (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2272
				  (0 << 9) |
2273
				  4);
2274
		}
2275
		intel->vertex_offset = intel->batch_used;
2276
		OUT_BATCH(0);	/* vertex count, to be filled in later */
2277
		OUT_BATCH(intel->vertex_index);
2278
		OUT_BATCH(1);	/* single instance */
2279
		OUT_BATCH(0);	/* start instance location */
2280
		OUT_BATCH(0);	/* index buffer offset, ignored */
2281
		intel->vertex_count = intel->vertex_index;
2282
	}
2283
 
2284
	intel->prim_emit(intel,
2285
			 srcX, srcY,
2286
			 maskX, maskY,
2287
			 dstX, dstY,
2288
			 w, h);
2289
	intel->vertex_index += 3;
2290
 
2291
	if (INTEL_INFO(intel)->gen < 050) {
2292
	    /* XXX OMG! */
2293
	    i965_vertex_flush(intel);
2294
	    OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
2295
	}
2296
 
2297
	intel_batch_end_atomic();
2298
}
2299
 
2300
void i965_batch_commit_notify(intel_screen_private *intel)
2301
{
2302
	intel->needs_render_state_emit = TRUE;
2303
	intel->needs_3d_invariant = TRUE;
2304
	intel->last_floats_per_vertex = 0;
2305
	intel->vertex_index = 0;
2306
 
2307
	intel->gen4_render_state->composite_op.vertex_id = -1;
2308
 
2309
	intel->gen6_render_state.num_sf_outputs = 0;
2310
	intel->gen6_render_state.samplers = NULL;
2311
	intel->gen6_render_state.blend = -1;
2312
	intel->gen6_render_state.kernel = NULL;
2313
	intel->gen6_render_state.drawrect = -1;
2314
 
2315
	assert(intel->surface_reloc == 0);
2316
}
2317
 
2318
/**
2319
 * Called at EnterVT so we can set up our offsets into the state buffer.
2320
 */
2321
void gen4_render_state_init()
2322
{
2323
    ENTER();
2324
 
2325
	intel_screen_private *intel = intel_get_screen_private();
2326
	struct gen4_render_state *render;
2327
	const struct wm_kernel_info *wm_kernels;
2328
	sampler_state_filter_t src_filter;
2329
	sampler_state_extend_t src_extend;
2330
	sampler_state_filter_t mask_filter;
2331
	sampler_state_extend_t mask_extend;
2332
	drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo;
2333
	drm_intel_bo *border_color_bo;
2334
	int m;
2335
 
2336
	intel->needs_3d_invariant = TRUE;
2337
 
2338
	intel->surface_bo =
2339
		drm_intel_bo_alloc(intel->bufmgr, "surface data",
2340
				   sizeof(intel->surface_data), 4096);
2341
	assert(intel->surface_bo);
2342
 
2343
	intel->surface_used = 0;
2344
 
2345
	if (intel->gen4_render_state == NULL) {
2346
		intel->gen4_render_state = calloc(1, sizeof(*render));
2347
		assert(intel->gen4_render_state != NULL);
2348
	}
2349
 
2350
	if (INTEL_INFO(intel)->gen >= 060)
2351
		return gen6_render_state_init();
2352
 
2353
	render = intel->gen4_render_state;
2354
	render->composite_op.vertex_id = -1;
2355
 
2356
	render->vs_state_bo = gen4_create_vs_unit_state(intel);
2357
 
2358
	/* Set up the two SF states (one for blending with a mask, one without) */
2359
	if (IS_GEN5(intel)) {
2360
		sf_kernel_bo = intel_bo_alloc_for_data(intel,
2361
						       sf_kernel_static_gen5,
2362
						       sizeof
2363
						       (sf_kernel_static_gen5),
2364
						       "sf kernel gen5");
2365
		sf_kernel_mask_bo =
2366
		    intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5,
2367
					    sizeof(sf_kernel_mask_static_gen5),
2368
					    "sf mask kernel");
2369
	} else {
2370
		sf_kernel_bo = intel_bo_alloc_for_data(intel,
2371
						       sf_kernel_static,
2372
						       sizeof(sf_kernel_static),
2373
						       "sf kernel");
2374
		sf_kernel_mask_bo = intel_bo_alloc_for_data(intel,
2375
							    sf_kernel_mask_static,
2376
							    sizeof
2377
							    (sf_kernel_mask_static),
2378
							    "sf mask kernel");
2379
	}
2380
	render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo);
2381
	render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo);
2382
	drm_intel_bo_unreference(sf_kernel_bo);
2383
	drm_intel_bo_unreference(sf_kernel_mask_bo);
2384
 
2385
	wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4;
2386
	for (m = 0; m < KERNEL_COUNT; m++) {
2387
		render->wm_kernel_bo[m] =
2388
			intel_bo_alloc_for_data(intel,
2389
					wm_kernels[m].data,
2390
					wm_kernels[m].size,
2391
					"WM kernel");
2392
	}
2393
 
2394
	/* Set up the WM states: each filter/extend type for source and mask, per
2395
	 * kernel.
2396
	 */
2397
	border_color_bo = sampler_border_color_create(intel);
2398
	for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) {
2399
		for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) {
2400
			for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) {
2401
				for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) {
2402
					drm_intel_bo *sampler_state_bo;
2403
 
2404
					sampler_state_bo =
2405
					    i965_create_sampler_state(intel,
2406
								      src_filter, src_extend,
2407
								      mask_filter, mask_extend,
2408
								      border_color_bo);
2409
 
2410
					for (m = 0; m < KERNEL_COUNT; m++) {
2411
						render->wm_state_bo[m][src_filter][src_extend][mask_filter][mask_extend] =
2412
							gen4_create_wm_state
2413
							(intel,
2414
							 wm_kernels[m]. has_mask,
2415
							 render->wm_kernel_bo[m],
2416
							 sampler_state_bo);
2417
					}
2418
					drm_intel_bo_unreference(sampler_state_bo);
2419
				}
2420
			}
2421
		}
2422
	}
2423
	drm_intel_bo_unreference(border_color_bo);
2424
 
2425
	render->cc_state_bo = gen4_create_cc_unit_state(intel);
2426
 
2427
    LEAVE();
2428
}
2429
 
2430
/**
2431
 * Called at LeaveVT.
2432
 */
2433
void gen4_render_state_cleanup(ScrnInfoPtr scrn)
2434
{
2435
	intel_screen_private *intel = intel_get_screen_private(scrn);
2436
	struct gen4_render_state *render_state = intel->gen4_render_state;
2437
	int i, j, k, l, m;
2438
 
2439
	drm_intel_bo_unreference(intel->surface_bo);
2440
	drm_intel_bo_unreference(render_state->vs_state_bo);
2441
	drm_intel_bo_unreference(render_state->sf_state_bo);
2442
	drm_intel_bo_unreference(render_state->sf_mask_state_bo);
2443
 
2444
	for (i = 0; i < KERNEL_COUNT; i++)
2445
		drm_intel_bo_unreference(render_state->wm_kernel_bo[i]);
2446
 
2447
	for (i = 0; i < FILTER_COUNT; i++)
2448
		for (j = 0; j < EXTEND_COUNT; j++)
2449
			for (k = 0; k < FILTER_COUNT; k++)
2450
				for (l = 0; l < EXTEND_COUNT; l++)
2451
					for (m = 0; m < KERNEL_COUNT; m++)
2452
						drm_intel_bo_unreference
2453
						    (render_state->
2454
						     wm_state_bo[m][i][j][k]
2455
						     [l]);
2456
 
2457
	for (i = 0; i < FILTER_COUNT; i++)
2458
		for (j = 0; j < EXTEND_COUNT; j++)
2459
			for (k = 0; k < FILTER_COUNT; k++)
2460
				for (l = 0; l < EXTEND_COUNT; l++)
2461
					drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]);
2462
 
2463
	drm_intel_bo_unreference(render_state->cc_state_bo);
2464
 
2465
	drm_intel_bo_unreference(render_state->cc_vp_bo);
2466
	drm_intel_bo_unreference(render_state->gen6_blend_bo);
2467
	drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo);
2468
 
2469
	free(intel->gen4_render_state);
2470
	intel->gen4_render_state = NULL;
2471
}
2472
 
2473
/*
2474
 * for GEN6+
2475
 */
2476
#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
2477
 
2478
static drm_intel_bo *
2479
gen6_composite_create_cc_state(intel_screen_private *intel)
2480
{
2481
	struct gen6_color_calc_state *state;
2482
	drm_intel_bo *cc_bo;
2483
	int ret;
2484
 
2485
	cc_bo = drm_intel_bo_alloc(intel->bufmgr,
2486
				"gen6 CC state",
2487
				sizeof(*state),
2488
				4096);
2489
	assert(cc_bo);
2490
 
2491
	ret = drm_intel_bo_map(cc_bo, TRUE);
2492
	assert(ret == 0);
2493
 
2494
	state = memset(cc_bo->virtual, 0, sizeof(*state));
2495
	state->constant_r = 1.0;
2496
	state->constant_g = 0.0;
2497
	state->constant_b = 1.0;
2498
	state->constant_a = 1.0;
2499
	drm_intel_bo_unmap(cc_bo);
2500
 
2501
	return cc_bo;
2502
	(void)ret;
2503
}
2504
 
2505
static drm_intel_bo *
2506
gen6_composite_create_blend_state(intel_screen_private *intel)
2507
{
2508
	drm_intel_bo *blend_bo;
2509
	int src, dst, ret;
2510
 
2511
	blend_bo = drm_intel_bo_alloc(intel->bufmgr,
2512
				"gen6 BLEND state",
2513
				BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
2514
				4096);
2515
	assert(blend_bo);
2516
 
2517
	ret = drm_intel_bo_map(blend_bo, TRUE);
2518
	assert(ret == 0);
2519
 
2520
	memset(blend_bo->virtual, 0, blend_bo->size);
2521
	for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) {
2522
		for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) {
2523
			uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE;
2524
			struct gen6_blend_state *blend;
2525
 
2526
			blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset);
2527
			blend->blend0.dest_blend_factor = dst;
2528
			blend->blend0.source_blend_factor = src;
2529
			blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
2530
			blend->blend0.blend_enable = 1;
2531
 
2532
			blend->blend1.post_blend_clamp_enable = 1;
2533
			blend->blend1.pre_blend_clamp_enable = 1;
2534
		}
2535
	}
2536
 
2537
	drm_intel_bo_unmap(blend_bo);
2538
	return blend_bo;
2539
	(void)ret;
2540
}
2541
 
2542
static drm_intel_bo *
2543
gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
2544
{
2545
	drm_intel_bo *depth_stencil_bo;
2546
	int ret;
2547
 
2548
	depth_stencil_bo =
2549
		drm_intel_bo_alloc(intel->bufmgr,
2550
				   "gen6 DEPTH_STENCIL state",
2551
				   sizeof(struct gen6_depth_stencil_state),
2552
				   4096);
2553
	assert(depth_stencil_bo);
2554
 
2555
	ret = drm_intel_bo_map(depth_stencil_bo, TRUE);
2556
	assert(ret == 0);
2557
 
2558
	memset(depth_stencil_bo->virtual, 0,
2559
	       sizeof(struct gen6_depth_stencil_state));
2560
	drm_intel_bo_unmap(depth_stencil_bo);
2561
 
2562
	return depth_stencil_bo;
2563
	(void)ret;
2564
}
2565
 
2566
static void
2567
gen6_composite_state_base_address(intel_screen_private *intel)
2568
{
2569
	OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
2570
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
2571
	intel->surface_reloc = intel->batch_used;
2572
	intel_batch_emit_dword(intel,
2573
			       intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
2574
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2575
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
2576
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
2577
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
2578
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2579
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2580
	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2581
}
2582
 
2583
static void
2584
gen6_composite_cc_state_pointers(intel_screen_private *intel,
2585
				 uint32_t blend_offset)
2586
{
2587
	struct gen4_render_state *render_state = intel->gen4_render_state;
2588
	drm_intel_bo *cc_bo = NULL;
2589
	drm_intel_bo *depth_stencil_bo = NULL;
2590
 
2591
	if (intel->gen6_render_state.blend == blend_offset)
2592
		return;
2593
 
2594
	if (intel->gen6_render_state.blend == -1) {
2595
		cc_bo = render_state->cc_state_bo;
2596
		depth_stencil_bo = render_state->gen6_depth_stencil_bo;
2597
	}
2598
	if (INTEL_INFO(intel)->gen >= 070) {
2599
		gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
2600
	} else {
2601
		gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
2602
	}
2603
 
2604
	intel->gen6_render_state.blend = blend_offset;
2605
}
2606
 
2607
static void
2608
gen6_composite_sampler_state_pointers(intel_screen_private *intel,
2609
				      drm_intel_bo *bo)
2610
{
2611
	if (intel->gen6_render_state.samplers == bo)
2612
		return;
2613
 
2614
	intel->gen6_render_state.samplers = bo;
2615
 
2616
	if (INTEL_INFO(intel)->gen >= 070)
2617
		gen7_upload_sampler_state_pointers(intel, bo);
2618
	else
2619
		gen6_upload_sampler_state_pointers(intel, bo);
2620
}
2621
 
2622
static void
2623
gen6_composite_wm_constants(intel_screen_private *intel)
2624
{
2625
	Bool ivb = INTEL_INFO(intel)->gen >= 070;
2626
	/* disable WM constant buffer */
2627
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2));
2628
	OUT_BATCH(0);
2629
	OUT_BATCH(0);
2630
	OUT_BATCH(0);
2631
	OUT_BATCH(0);
2632
	if (ivb) {
2633
		OUT_BATCH(0);
2634
		OUT_BATCH(0);
2635
	}
2636
}
2637
 
2638
static void
2639
gen6_composite_sf_state(intel_screen_private *intel,
2640
			Bool has_mask)
2641
{
2642
	int num_sf_outputs = has_mask ? 2 : 1;
2643
 
2644
	if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs)
2645
		return;
2646
 
2647
	intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
2648
 
2649
	if (INTEL_INFO(intel)->gen >= 070)
2650
		gen7_upload_sf_state(intel, num_sf_outputs, 1);
2651
	else
2652
		gen6_upload_sf_state(intel, num_sf_outputs, 1);
2653
}
2654
 
2655
static void
2656
gen6_composite_wm_state(intel_screen_private *intel,
2657
			Bool has_mask,
2658
			drm_intel_bo *bo)
2659
{
2660
	int num_surfaces = has_mask ? 3 : 2;
2661
	int num_sf_outputs = has_mask ? 2 : 1;
2662
 
2663
	if (intel->gen6_render_state.kernel == bo)
2664
		return;
2665
 
2666
	intel->gen6_render_state.kernel = bo;
2667
 
2668
	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
2669
	OUT_RELOC(bo,
2670
		I915_GEM_DOMAIN_INSTRUCTION, 0,
2671
		0);
2672
	OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2673
		  (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2674
	OUT_BATCH(0);
2675
	OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2676
	OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2677
		  GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2678
		  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2679
	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2680
		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2681
	OUT_BATCH(0);
2682
	OUT_BATCH(0);
2683
}
2684
 
2685
static void
2686
gen7_composite_wm_state(intel_screen_private *intel,
2687
			Bool has_mask,
2688
			drm_intel_bo *bo)
2689
{
2690
	int num_surfaces = has_mask ? 3 : 2;
2691
	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2692
	unsigned int num_samples = 0;
2693
 
2694
	if (IS_HSW(intel)) {
2695
		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2696
		num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2697
	}
2698
 
2699
	if (intel->gen6_render_state.kernel == bo)
2700
		return;
2701
 
2702
	intel->gen6_render_state.kernel = bo;
2703
 
2704
	OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2));
2705
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
2706
		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2707
	OUT_BATCH(0);
2708
 
2709
	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
2710
	OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2711
	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2712
		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2713
	OUT_BATCH(0); /* scratch space base offset */
2714
	OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples |
2715
		  GEN7_PS_ATTRIBUTE_ENABLE |
2716
		  GEN7_PS_16_DISPATCH_ENABLE);
2717
	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2718
	OUT_BATCH(0); /* kernel 1 pointer */
2719
	OUT_BATCH(0); /* kernel 2 pointer */
2720
}
2721
 
2722
 
2723
static void
2724
gen6_composite_drawing_rectangle(intel_screen_private *intel,
2725
				 PixmapPtr dest)
2726
{
2727
	uint32_t dw =
2728
		DRAW_YMAX(dest->drawable.height - 1) |
2729
		DRAW_XMAX(dest->drawable.width - 1);
2730
 
2731
	/* XXX cacomposite depends upon the implicit non-pipelined flush */
2732
	if (0 && intel->gen6_render_state.drawrect == dw)
2733
		return;
2734
	intel->gen6_render_state.drawrect = dw;
2735
 
2736
	OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
2737
	OUT_BATCH(0x00000000);	/* ymin, xmin */
2738
	OUT_BATCH(dw);	/* ymax, xmax */
2739
	OUT_BATCH(0x00000000);	/* yorigin, xorigin */
2740
}
2741
 
2742
static void
2743
gen6_composite_vertex_element_state(intel_screen_private *intel,
2744
				    Bool has_mask,
2745
				    Bool is_affine)
2746
{
2747
	/*
2748
	 * vertex data in vertex buffer
2749
	 *    position: (x, y)
2750
	 *    texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
2751
	 *    texture coordinate 1 if (has_mask is TRUE): same as above
2752
	 */
2753
	gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op;
2754
	int nelem = has_mask ? 2 : 1;
2755
	int selem = is_affine ? 2 : 3;
2756
	uint32_t w_component;
2757
	uint32_t src_format;
2758
	int id;
2759
 
2760
	id = has_mask << 1 | is_affine;
2761
 
2762
	if (composite_op->vertex_id == id)
2763
		return;
2764
 
2765
	composite_op->vertex_id = id;
2766
 
2767
	if (is_affine) {
2768
		src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
2769
		w_component = BRW_VFCOMPONENT_STORE_1_FLT;
2770
	} else {
2771
		src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
2772
		w_component = BRW_VFCOMPONENT_STORE_SRC;
2773
	}
2774
 
2775
	/* The VUE layout
2776
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
2777
	 *    dword 4-7: position (x, y, 1.0, 1.0),
2778
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
2779
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
2780
	 *
2781
	 * dword 4-15 are fetched from vertex buffer
2782
	 */
2783
	OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
2784
		((2 * (2 + nelem)) + 1 - 2));
2785
 
2786
	OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2787
		  (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2788
		  (0 << VE0_OFFSET_SHIFT));
2789
	OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
2790
		  (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
2791
		  (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
2792
		  (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
2793
 
2794
	/* x,y */
2795
	OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2796
		  (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2797
		  (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
2798
	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2799
		  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2800
		  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2801
		  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2802
 
2803
	/* u0, v0, w0 */
2804
	OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2805
		  (src_format << VE0_FORMAT_SHIFT) |
2806
		  ((2 * 4) << VE0_OFFSET_SHIFT));	/* offset vb in bytes */
2807
	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2808
		  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2809
		  (w_component << VE1_VFCOMPONENT_2_SHIFT) |
2810
		  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2811
 
2812
	/* u1, v1, w1 */
2813
	if (has_mask) {
2814
		OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2815
			  GEN6_VE0_VALID |
2816
			  (src_format << VE0_FORMAT_SHIFT) |
2817
			  (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
2818
		OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2819
			  (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2820
			  (w_component << VE1_VFCOMPONENT_2_SHIFT) |
2821
			  (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2822
	}
2823
}
2824
 
2825
static void
2826
gen6_emit_composite_state(struct intel_screen_private *intel)
2827
{
2828
	struct gen4_render_state *render = intel->gen4_render_state;
2829
	gen4_composite_op *composite_op = &render->composite_op;
2830
	sampler_state_filter_t src_filter = composite_op->src_filter;
2831
	sampler_state_filter_t mask_filter = composite_op->mask_filter;
2832
	sampler_state_extend_t src_extend = composite_op->src_extend;
2833
	sampler_state_extend_t mask_extend = composite_op->mask_extend;
2834
	Bool is_affine = composite_op->is_affine;
2835
	Bool has_mask = intel->render_mask != NULL;
2836
	Bool ivb = INTEL_INFO(intel)->gen >= 070;
2837
	uint32_t src, dst;
2838
	drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend];
2839
 
2840
	intel->needs_render_state_emit = FALSE;
2841
	if (intel->needs_3d_invariant) {
2842
		gen6_upload_invariant_states(intel);
2843
 
2844
		if (ivb) {
2845
			gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo);
2846
			gen7_upload_urb(intel);
2847
			gen7_upload_bypass_states(intel);
2848
			gen7_upload_depth_buffer_state(intel);
2849
		} else {
2850
			gen6_upload_invariant_states(intel);
2851
			gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
2852
			gen6_upload_urb(intel);
2853
 
2854
			gen6_upload_gs_state(intel);
2855
			gen6_upload_depth_buffer_state(intel);
2856
		}
2857
		gen6_composite_wm_constants(intel);
2858
		gen6_upload_vs_state(intel);
2859
		gen6_upload_clip_state(intel);
2860
 
2861
		intel->needs_3d_invariant = FALSE;
2862
	}
2863
 
2864
	i965_get_blend_cntl(composite_op->op,
2865
			    intel->render_mask_picture,
2866
			    intel->render_dest_picture->format,
2867
			    &src, &dst);
2868
 
2869
	if (intel->surface_reloc == 0)
2870
		gen6_composite_state_base_address(intel);
2871
 
2872
	gen6_composite_cc_state_pointers(intel,
2873
					(src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE);
2874
	gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo);
2875
	gen6_composite_sf_state(intel, has_mask);
2876
	if (ivb) {
2877
		gen7_composite_wm_state(intel, has_mask,
2878
					render->wm_kernel_bo[composite_op->wm_kernel]);
2879
		gen7_upload_binding_table(intel, intel->surface_table);
2880
	} else {
2881
		gen6_composite_wm_state(intel, has_mask,
2882
					render->wm_kernel_bo[composite_op->wm_kernel]);
2883
		gen6_upload_binding_table(intel, intel->surface_table);
2884
	}
2885
	gen6_composite_drawing_rectangle(intel, intel->render_dest);
2886
	gen6_composite_vertex_element_state(intel, has_mask, is_affine);
2887
}
2888
 
2889
static void
2890
gen6_render_state_init()
2891
{
2892
    ENTER();
2893
 
2894
	intel_screen_private *intel = intel_get_screen_private();
2895
	struct gen4_render_state *render;
2896
	sampler_state_filter_t src_filter;
2897
	sampler_state_filter_t mask_filter;
2898
	sampler_state_extend_t src_extend;
2899
	sampler_state_extend_t mask_extend;
2900
	int m;
2901
	drm_intel_bo *border_color_bo;
2902
	const struct wm_kernel_info *wm_kernels;
2903
 
2904
	render= intel->gen4_render_state;
2905
	render->composite_op.vertex_id = -1;
2906
 
2907
	intel->gen6_render_state.num_sf_outputs = 0;
2908
	intel->gen6_render_state.samplers = NULL;
2909
	intel->gen6_render_state.blend = -1;
2910
	intel->gen6_render_state.kernel = NULL;
2911
	intel->gen6_render_state.drawrect = -1;
2912
 
2913
	wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6;
2914
	for (m = 0; m < KERNEL_COUNT; m++) {
2915
		render->wm_kernel_bo[m] =
2916
			intel_bo_alloc_for_data(intel,
2917
					wm_kernels[m].data,
2918
					wm_kernels[m].size,
2919
					"WM kernel gen6/7");
2920
	}
2921
 
2922
	border_color_bo = sampler_border_color_create(intel);
2923
 
2924
	for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) {
2925
		for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) {
2926
			for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) {
2927
				for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) {
2928
					render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend] =
2929
						i965_create_sampler_state(intel,
2930
									  src_filter, src_extend,
2931
									  mask_filter, mask_extend,
2932
								border_color_bo);
2933
				}
2934
			}
2935
		}
2936
	}
2937
 
2938
	drm_intel_bo_unreference(border_color_bo);
2939
	render->cc_vp_bo = gen4_create_cc_viewport(intel);
2940
	render->cc_state_bo = gen6_composite_create_cc_state(intel);
2941
	render->gen6_blend_bo = gen6_composite_create_blend_state(intel);
2942
	render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel);
2943
 
2944
    LEAVE();
2945
}
2946
 
2947
void i965_vertex_flush(struct intel_screen_private *intel)
2948
{
2949
	if (intel->vertex_offset) {
2950
		intel->batch_ptr[intel->vertex_offset] =
2951
			intel->vertex_index - intel->vertex_count;
2952
		intel->vertex_offset = 0;
2953
	}
2954
}
2955
 
2956
void i965_batch_flush(struct intel_screen_private *intel)
2957
{
2958
	if (intel->surface_used)
2959
		i965_surface_flush(intel);
2960
}
2961
 
2962
 
2963
#if HAS_DEVPRIVATEKEYREC
2964
DevPrivateKeyRec uxa_pixmap_index;
2965
#else
2966
int uxa_pixmap_index;
2967
#endif
2968
 
2969
#define xFixedToFloat(val) \
2970
	((float)xFixedToInt(val) + ((float)xFixedFrac(val) / 65536.0))
2971
 
2972
static Bool
2973
_intel_transform_point(PictTransformPtr transform,
2974
		       float x, float y, float result[3])
2975
{
2976
	int j;
2977
 
2978
	for (j = 0; j < 3; j++) {
2979
		result[j] = (xFixedToFloat(transform->matrix[j][0]) * x +
2980
			     xFixedToFloat(transform->matrix[j][1]) * y +
2981
			     xFixedToFloat(transform->matrix[j][2]));
2982
	}
2983
	if (!result[2])
2984
		return FALSE;
2985
	return TRUE;
2986
}
2987
 
2988
/**
2989
 * Returns the floating-point coordinates transformed by the given transform.
2990
 *
2991
 * transform may be null.
2992
 */
2993
Bool
2994
intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
2995
				  float *x_out, float *y_out)
2996
{
2997
	if (transform == NULL) {
2998
		*x_out = x;
2999
		*y_out = y;
3000
	} else {
3001
		float result[3];
3002
 
3003
		if (!_intel_transform_point(transform,
3004
					    x, y,
3005
					    result))
3006
			return FALSE;
3007
		*x_out = result[0] / result[2];
3008
		*y_out = result[1] / result[2];
3009
	}
3010
	return TRUE;
3011
}
3012
 
3013
/**
3014
 * Returns the un-normalized floating-point coordinates transformed by the given transform.
3015
 *
3016
 * transform may be null.
3017
 */
3018
Bool
3019
intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform,
3020
				     float *x_out, float *y_out, float *w_out)
3021
{
3022
	if (transform == NULL) {
3023
		*x_out = x;
3024
		*y_out = y;
3025
		*w_out = 1;
3026
	} else {
3027
		float result[3];
3028
 
3029
		if (!_intel_transform_point(transform,
3030
					    x, y,
3031
					    result))
3032
			return FALSE;
3033
		*x_out = result[0];
3034
		*y_out = result[1];
3035
		*w_out = result[2];
3036
	}
3037
	return TRUE;
3038
}
3039
 
3040
/**
3041
 * Returns whether the provided transform is affine.
3042
 *
3043
 * transform may be null.
3044
 */
3045
Bool intel_transform_is_affine(PictTransformPtr t)
3046
{
3047
	if (t == NULL)
3048
		return TRUE;
3049
	return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
3050
}