Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2351 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#include 
34
#include 
35
#include "i915_drm.h"
36
#include "i915_drv.h"
37
#include "intel_drv.h"
38
 
39
#include 
40
#include 
41
#include 
42
#include 
43
 
44
#include 
45
 
46
#include "../bitmap.h"
47
 
48
#include "sna.h"
49
//#include "sna_reg.h"
50
#include "sna_render.h"
51
//#include "sna_render_inline.h"
52
//#include "sna_video.h"
53
 
54
#include "gen6_render.h"
55
 
56
 
57
#define NO_COMPOSITE 0
58
#define NO_COMPOSITE_SPANS 0
59
#define NO_COPY 0
60
#define NO_COPY_BOXES 0
61
#define NO_FILL 0
62
#define NO_FILL_BOXES 0
63
#define NO_CLEAR 0
64
 
65
#define NO_RING_SWITCH 1
66
 
67
#define GEN6_MAX_SIZE 8192
68
 
69
static const uint32_t ps_kernel_nomask_affine[][4] = {
70
#include "exa_wm_src_affine.g6b"
71
#include "exa_wm_src_sample_argb.g6b"
72
#include "exa_wm_write.g6b"
73
};
74
 
75
static const uint32_t ps_kernel_nomask_projective[][4] = {
76
#include "exa_wm_src_projective.g6b"
77
#include "exa_wm_src_sample_argb.g6b"
78
#include "exa_wm_write.g6b"
79
};
80
 
81
 
82
#define KERNEL(kernel_enum, kernel, masked) \
83
    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), masked}
84
static const struct wm_kernel_info {
85
	const char *name;
86
	const void *data;
87
	unsigned int size;
88
	Bool has_mask;
89
} wm_kernels[] = {
90
	KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE),
91
	KERNEL(NOMASK_PROJECTIVE, ps_kernel_nomask_projective, FALSE),
92
 
93
};
94
#undef KERNEL
95
 
96
static const struct blendinfo {
97
	Bool src_alpha;
98
	uint32_t src_blend;
99
	uint32_t dst_blend;
100
} gen6_blend_op[] = {
101
	/* Clear */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
102
	/* Src */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
103
	/* Dst */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
104
	/* Over */	{1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
105
	/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
106
	/* In */	{0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
107
	/* InReverse */	{1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
108
	/* Out */	{0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
109
	/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
110
	/* Atop */	{1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
111
	/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
112
	/* Xor */	{1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
113
	/* Add */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
114
};
115
 
116
 
117
/**
118
 * Highest-valued BLENDFACTOR used in gen6_blend_op.
119
 *
120
 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
121
 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
122
 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
123
 */
124
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
125
 
126
/* FIXME: surface format defined in gen6_defines.h, shared Sampling engine
127
 * 1.7.2
128
 
129
static const struct formatinfo {
130
	CARD32 pict_fmt;
131
	uint32_t card_fmt;
132
} gen6_tex_formats[] = {
133
	{PICT_a8, GEN6_SURFACEFORMAT_A8_UNORM},
134
	{PICT_a8r8g8b8, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM},
135
	{PICT_x8r8g8b8, GEN6_SURFACEFORMAT_B8G8R8X8_UNORM},
136
	{PICT_a8b8g8r8, GEN6_SURFACEFORMAT_R8G8B8A8_UNORM},
137
	{PICT_x8b8g8r8, GEN6_SURFACEFORMAT_R8G8B8X8_UNORM},
138
	{PICT_r8g8b8, GEN6_SURFACEFORMAT_R8G8B8_UNORM},
139
	{PICT_r5g6b5, GEN6_SURFACEFORMAT_B5G6R5_UNORM},
140
	{PICT_a1r5g5b5, GEN6_SURFACEFORMAT_B5G5R5A1_UNORM},
141
	{PICT_a2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10A2_UNORM},
142
	{PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM},
143
	{PICT_a2b10g10r10, GEN6_SURFACEFORMAT_R10G10B10A2_UNORM},
144
	{PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM},
145
	{PICT_a4r4g4b4, GEN6_SURFACEFORMAT_B4G4R4A4_UNORM},
146
};
147
 */
148
 
149
#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
150
 
151
#define BLEND_OFFSET(s, d) \
152
	(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
153
 
154
#define SAMPLER_OFFSET(sf, se, mf, me) \
155
	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * 2 * sizeof(struct gen6_sampler_state))
156
 
157
#define OUT_BATCH(v) batch_emit(sna, v)
158
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
159
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
160
 
161
static inline bool too_large(int width, int height)
162
{
163
	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
164
}
165
 
166
static uint32_t gen6_get_blend(int op,
167
			       bool has_component_alpha,
168
			       uint32_t dst_format)
169
{
170
	uint32_t src, dst;
171
 
172
    src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
173
    dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
174
 
175
#if 0
176
	/* If there's no dst alpha channel, adjust the blend op so that
177
	 * we'll treat it always as 1.
178
	 */
179
	if (PICT_FORMAT_A(dst_format) == 0) {
180
		if (src == GEN6_BLENDFACTOR_DST_ALPHA)
181
			src = GEN6_BLENDFACTOR_ONE;
182
		else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
183
			src = GEN6_BLENDFACTOR_ZERO;
184
	}
185
 
186
	/* If the source alpha is being used, then we should only be in a
187
	 * case where the source blend factor is 0, and the source blend
188
	 * value is the mask channels multiplied by the source picture's alpha.
189
	 */
190
	if (has_component_alpha && gen6_blend_op[op].src_alpha) {
191
		if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
192
			dst = GEN6_BLENDFACTOR_SRC_COLOR;
193
		else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
194
			dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
195
	}
196
 
197
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
198
	     op, dst_format, PICT_FORMAT_A(dst_format),
199
	     src, dst, (int)BLEND_OFFSET(src, dst)));
200
#endif
201
 
202
	return BLEND_OFFSET(src, dst);
203
}
204
 
205
static uint32_t gen6_get_dest_format(CARD32 format)
206
{
207
    return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
208
 
209
/*
210
	switch (format) {
211
	default:
212
		assert(0);
213
	case PICT_a8r8g8b8:
214
	case PICT_x8r8g8b8:
215
		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
216
	case PICT_a8b8g8r8:
217
	case PICT_x8b8g8r8:
218
		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
219
	case PICT_a2r10g10b10:
220
	case PICT_x2r10g10b10:
221
		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
222
	case PICT_r5g6b5:
223
		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
224
	case PICT_x1r5g5b5:
225
	case PICT_a1r5g5b5:
226
		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
227
	case PICT_a8:
228
		return GEN6_SURFACEFORMAT_A8_UNORM;
229
	case PICT_a4r4g4b4:
230
	case PICT_x4r4g4b4:
231
		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
232
	}
233
 */
234
}
235
 
236
#if 0
237
static Bool gen6_check_dst_format(PictFormat format)
238
{
239
	switch (format) {
240
	case PICT_a8r8g8b8:
241
	case PICT_x8r8g8b8:
242
	case PICT_a8b8g8r8:
243
	case PICT_x8b8g8r8:
244
	case PICT_a2r10g10b10:
245
	case PICT_x2r10g10b10:
246
	case PICT_r5g6b5:
247
	case PICT_x1r5g5b5:
248
	case PICT_a1r5g5b5:
249
	case PICT_a8:
250
	case PICT_a4r4g4b4:
251
	case PICT_x4r4g4b4:
252
		return TRUE;
253
	}
254
	return FALSE;
255
}
256
 
257
static bool gen6_check_format(uint32_t format)
258
{
259
	switch (format) {
260
	case PICT_a8r8g8b8:
261
	case PICT_x8r8g8b8:
262
	case PICT_a8b8g8r8:
263
	case PICT_x8b8g8r8:
264
	case PICT_a2r10g10b10:
265
	case PICT_x2r10g10b10:
266
	case PICT_r8g8b8:
267
	case PICT_r5g6b5:
268
	case PICT_a1r5g5b5:
269
	case PICT_a8:
270
	case PICT_a4r4g4b4:
271
	case PICT_x4r4g4b4:
272
		return true;
273
	default:
274
		DBG(("%s: unhandled format: %x\n", __FUNCTION__, format));
275
		return false;
276
	}
277
}
278
 
279
static uint32_t gen6_filter(uint32_t filter)
280
{
281
	switch (filter) {
282
	default:
283
		assert(0);
284
	case PictFilterNearest:
285
		return SAMPLER_FILTER_NEAREST;
286
	case PictFilterBilinear:
287
		return SAMPLER_FILTER_BILINEAR;
288
	}
289
}
290
 
291
static uint32_t gen6_check_filter(PicturePtr picture)
292
{
293
	switch (picture->filter) {
294
	case PictFilterNearest:
295
	case PictFilterBilinear:
296
		return TRUE;
297
	default:
298
		return FALSE;
299
	}
300
}
301
 
302
static uint32_t gen6_repeat(uint32_t repeat)
303
{
304
	switch (repeat) {
305
	default:
306
		assert(0);
307
	case RepeatNone:
308
		return SAMPLER_EXTEND_NONE;
309
	case RepeatNormal:
310
		return SAMPLER_EXTEND_REPEAT;
311
	case RepeatPad:
312
		return SAMPLER_EXTEND_PAD;
313
	case RepeatReflect:
314
		return SAMPLER_EXTEND_REFLECT;
315
	}
316
}
317
 
318
static bool gen6_check_repeat(PicturePtr picture)
319
{
320
	if (!picture->repeat)
321
		return TRUE;
322
 
323
	switch (picture->repeatType) {
324
	case RepeatNone:
325
	case RepeatNormal:
326
	case RepeatPad:
327
	case RepeatReflect:
328
		return TRUE;
329
	default:
330
		return FALSE;
331
	}
332
}
333
#endif
334
 
335
static int
336
gen6_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
337
{
338
	int base;
339
 
340
	if (has_mask) {
341
/*
342
		if (is_ca) {
343
			if (gen6_blend_op[op].src_alpha)
344
				base = GEN6_WM_KERNEL_MASKCA_SRCALPHA;
345
			else
346
				base = GEN6_WM_KERNEL_MASKCA;
347
		} else
348
			base = GEN6_WM_KERNEL_MASK;
349
*/
350
	} else
351
		base = GEN6_WM_KERNEL_NOMASK;
352
 
353
	return base + !is_affine;
354
}
355
 
356
static void
357
gen6_emit_urb(struct sna *sna)
358
{
359
	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
360
	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
361
		  (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
362
	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
363
		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
364
}
365
 
366
static void
367
gen6_emit_state_base_address(struct sna *sna)
368
{
369
	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
370
	OUT_BATCH(0); /* general */
371
 
372
//   OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
373
//                sna->kgem.nbatch,
374
//                NULL,
375
//                I915_GEM_DOMAIN_INSTRUCTION << 16,
376
//                 BASE_ADDRESS_MODIFY));
377
 
378
    OUT_BATCH((sna->kgem.batch_obj->gtt_offset+
379
              sna->kgem.batch_idx*4096)|BASE_ADDRESS_MODIFY);
380
 
381
//   OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
382
//                sna->kgem.nbatch,
383
//                sna->render_state.gen6.general_bo,
384
//                I915_GEM_DOMAIN_INSTRUCTION << 16,
385
//                BASE_ADDRESS_MODIFY));
386
 
387
    OUT_BATCH(sna->render_state.gen6.general_bo->gaddr|BASE_ADDRESS_MODIFY);
388
 
389
	OUT_BATCH(0); /* indirect */
390
//   OUT_BATCH(kgem_add_reloc(&sna->kgem,
391
//                sna->kgem.nbatch,
392
//                sna->render_state.gen6.general_bo,
393
//                I915_GEM_DOMAIN_INSTRUCTION << 16,
394
//                BASE_ADDRESS_MODIFY));
395
 
396
    OUT_BATCH(sna->render_state.gen6.general_bo->gaddr|BASE_ADDRESS_MODIFY);
397
 
398
	/* upper bounds, disable */
399
	OUT_BATCH(0);
400
	OUT_BATCH(BASE_ADDRESS_MODIFY);
401
	OUT_BATCH(0);
402
	OUT_BATCH(BASE_ADDRESS_MODIFY);
403
}
404
 
405
static void
406
gen6_emit_viewports(struct sna *sna)
407
{
408
	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
409
		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
410
		  (4 - 2));
411
	OUT_BATCH(0);
412
	OUT_BATCH(0);
413
	OUT_BATCH(sna->render_state.gen6.cc_vp);
414
}
415
 
416
static void
417
gen6_emit_vs(struct sna *sna)
418
{
419
	/* disable VS constant buffer */
420
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
421
	OUT_BATCH(0);
422
	OUT_BATCH(0);
423
	OUT_BATCH(0);
424
	OUT_BATCH(0);
425
 
426
	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
427
	OUT_BATCH(0); /* no VS kernel */
428
	OUT_BATCH(0);
429
	OUT_BATCH(0);
430
	OUT_BATCH(0);
431
	OUT_BATCH(0); /* pass-through */
432
}
433
 
434
static void
435
gen6_emit_gs(struct sna *sna)
436
{
437
	/* disable GS constant buffer */
438
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
439
	OUT_BATCH(0);
440
	OUT_BATCH(0);
441
	OUT_BATCH(0);
442
	OUT_BATCH(0);
443
 
444
	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
445
	OUT_BATCH(0); /* no GS kernel */
446
	OUT_BATCH(0);
447
	OUT_BATCH(0);
448
	OUT_BATCH(0);
449
	OUT_BATCH(0);
450
	OUT_BATCH(0); /* pass-through */
451
}
452
 
453
static void
454
gen6_emit_clip(struct sna *sna)
455
{
456
	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
457
	OUT_BATCH(0);
458
	OUT_BATCH(0); /* pass-through */
459
	OUT_BATCH(0);
460
}
461
 
462
static void
463
gen6_emit_wm_constants(struct sna *sna)
464
{
465
	/* disable WM constant buffer */
466
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
467
	OUT_BATCH(0);
468
	OUT_BATCH(0);
469
	OUT_BATCH(0);
470
	OUT_BATCH(0);
471
}
472
 
473
static void
474
gen6_emit_null_depth_buffer(struct sna *sna)
475
{
476
	OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
477
	OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
478
		  GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
479
	OUT_BATCH(0);
480
	OUT_BATCH(0);
481
	OUT_BATCH(0);
482
	OUT_BATCH(0);
483
	OUT_BATCH(0);
484
 
485
	OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
486
	OUT_BATCH(0);
487
}
488
 
489
static void
490
gen6_emit_invariant(struct sna *sna)
491
{
492
	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
493
 
494
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
495
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
496
		  GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
497
	OUT_BATCH(0);
498
 
499
	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
500
	OUT_BATCH(1);
501
 
502
	gen6_emit_urb(sna);
503
 
504
	gen6_emit_state_base_address(sna);
505
 
506
	gen6_emit_viewports(sna);
507
	gen6_emit_vs(sna);
508
	gen6_emit_gs(sna);
509
	gen6_emit_clip(sna);
510
	gen6_emit_wm_constants(sna);
511
	gen6_emit_null_depth_buffer(sna);
512
 
513
	sna->render_state.gen6.needs_invariant = FALSE;
514
}
515
 
516
static bool
517
gen6_emit_cc(struct sna *sna,
518
	     int op, bool has_component_alpha, uint32_t dst_format)
519
{
520
	struct gen6_render_state *render = &sna->render_state.gen6;
521
	uint32_t blend;
522
 
523
	blend = gen6_get_blend(op, has_component_alpha, dst_format);
524
 
525
	DBG(("%s(op=%d, ca=%d, format=%x): new=%x, current=%x\n",
526
	     __FUNCTION__,
527
	     op, has_component_alpha, dst_format,
528
	     blend, render->blend));
529
	if (render->blend == blend)
530
		return op <= PictOpSrc;
531
 
532
	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
533
	OUT_BATCH((render->cc_blend + blend) | 1);
534
	if (render->blend == (unsigned)-1) {
535
		OUT_BATCH(1);
536
		OUT_BATCH(1);
537
	} else {
538
		OUT_BATCH(0);
539
		OUT_BATCH(0);
540
	}
541
 
542
	render->blend = blend;
543
	return op <= PictOpSrc;
544
}
545
 
546
static void
547
gen6_emit_sampler(struct sna *sna, uint32_t state)
548
{
549
	assert(state <
550
	       2 * sizeof(struct gen6_sampler_state) *
551
	       FILTER_COUNT * EXTEND_COUNT *
552
	       FILTER_COUNT * EXTEND_COUNT);
553
 
554
	if (sna->render_state.gen6.samplers == state)
555
		return;
556
 
557
	sna->render_state.gen6.samplers = state;
558
 
559
	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
560
		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
561
		  (4 - 2));
562
	OUT_BATCH(0); /* VS */
563
	OUT_BATCH(0); /* GS */
564
	OUT_BATCH(sna->render_state.gen6.wm_state + state);
565
}
566
 
567
static void
568
gen6_emit_sf(struct sna *sna, Bool has_mask)
569
{
570
	int num_sf_outputs = has_mask ? 2 : 1;
571
 
572
	if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
573
		return;
574
 
575
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
576
	     __FUNCTION__, num_sf_outputs, 1, 0));
577
 
578
	sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
579
 
580
	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
581
	OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
582
		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
583
		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
584
	OUT_BATCH(0);
585
	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
586
	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
587
	OUT_BATCH(0);
588
	OUT_BATCH(0);
589
	OUT_BATCH(0);
590
	OUT_BATCH(0);
591
	OUT_BATCH(0); /* DW9 */
592
	OUT_BATCH(0);
593
	OUT_BATCH(0);
594
	OUT_BATCH(0);
595
	OUT_BATCH(0);
596
	OUT_BATCH(0); /* DW14 */
597
	OUT_BATCH(0);
598
	OUT_BATCH(0);
599
	OUT_BATCH(0);
600
	OUT_BATCH(0);
601
	OUT_BATCH(0); /* DW19 */
602
}
603
 
604
static void
605
gen6_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_inputs)
606
{
607
	if (sna->render_state.gen6.kernel == kernel)
608
		return;
609
 
610
	sna->render_state.gen6.kernel = kernel;
611
 
612
	DBG(("%s: switching to %s\n", __FUNCTION__, wm_kernels[kernel].name));
613
 
614
	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
615
	OUT_BATCH(sna->render_state.gen6.wm_kernel[kernel]);
616
	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
617
		  nr_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
618
	OUT_BATCH(0);
619
	OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */
620
	OUT_BATCH((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
621
		  GEN6_3DSTATE_WM_DISPATCH_ENABLE |
622
		  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
623
	OUT_BATCH(nr_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
624
		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
625
	OUT_BATCH(0);
626
	OUT_BATCH(0);
627
}
628
 
629
static bool
630
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
631
{
632
	if (sna->render_state.gen6.surface_table == offset)
633
		return false;
634
 
635
	/* Binding table pointers */
636
	OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
637
		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
638
		  (4 - 2));
639
	OUT_BATCH(0);		/* vs */
640
	OUT_BATCH(0);		/* gs */
641
	/* Only the PS uses the binding table */
642
	OUT_BATCH(offset*4);
643
 
644
	sna->render_state.gen6.surface_table = offset;
645
	return true;
646
}
647
 
648
static bool
649
gen6_emit_drawing_rectangle(struct sna *sna,
650
			    const struct sna_composite_op *op)
651
{
652
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
653
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
654
 
655
	assert(!too_large(op->dst.x, op->dst.y));
656
	assert(!too_large(op->dst.width, op->dst.height));
657
 
658
	if (sna->render_state.gen6.drawrect_limit  == limit &&
659
	    sna->render_state.gen6.drawrect_offset == offset)
660
		return false;
661
 
662
	/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
663
	 * produced by non-pipelined state commands), software needs to first
664
	 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
665
	 * 0.
666
	 *
667
	 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
668
	 * BEFORE the pipe-control with a post-sync op and no write-cache
669
	 * flushes.
670
	 */
671
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
672
	OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
673
		  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
674
	OUT_BATCH(0);
675
	OUT_BATCH(0);
676
 
677
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
678
	OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
679
//   OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
680
//                sna->render_state.gen6.general_bo,
681
//                I915_GEM_DOMAIN_INSTRUCTION << 16 |
682
//                I915_GEM_DOMAIN_INSTRUCTION,
683
//                64));
684
 
685
    OUT_BATCH(sna->render_state.gen6.general_bo->gaddr+64);
686
 
687
	OUT_BATCH(0);
688
 
689
	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
690
	OUT_BATCH(0);
691
	OUT_BATCH(limit);
692
	OUT_BATCH(offset);
693
 
694
	sna->render_state.gen6.drawrect_offset = offset;
695
	sna->render_state.gen6.drawrect_limit = limit;
696
	return true;
697
}
698
 
699
static void
700
gen6_emit_vertex_elements(struct sna *sna,
701
			  const struct sna_composite_op *op)
702
{
703
	/*
704
	 * vertex data in vertex buffer
705
	 *    position: (x, y)
706
	 *    texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
707
	 *    texture coordinate 1 if (has_mask is TRUE): same as above
708
	 */
709
	struct gen6_render_state *render = &sna->render_state.gen6;
710
	int nelem = op->mask.bo ? 2 : 1;
711
	int selem = op->is_affine ? 2 : 3;
712
	uint32_t w_component;
713
	uint32_t src_format;
714
	int id = op->u.gen6.ve_id;
715
 
716
	if (render->ve_id == id)
717
		return;
718
	render->ve_id = id;
719
 
720
	if (op->is_affine) {
721
		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
722
		w_component = GEN6_VFCOMPONENT_STORE_1_FLT;
723
	} else {
724
		src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
725
		w_component = GEN6_VFCOMPONENT_STORE_SRC;
726
	}
727
 
728
	/* The VUE layout
729
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
730
	 *    dword 4-7: position (x, y, 1.0, 1.0),
731
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
732
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
733
	 *
734
	 * dword 4-15 are fetched from vertex buffer
735
	 */
736
	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
737
		((2 * (2 + nelem)) + 1 - 2));
738
 
739
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
740
		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
741
 
742
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
743
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
744
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
745
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
746
 
747
	/* x,y */
748
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
749
		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
750
 
751
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
752
		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
753
		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
754
		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
755
 
756
	/* u0, v0, w0 */
757
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
758
		  src_format << VE0_FORMAT_SHIFT |
759
		  4 << VE0_OFFSET_SHIFT);	/* offset vb in bytes */
760
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
761
		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
762
		  w_component << VE1_VFCOMPONENT_2_SHIFT |
763
		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
764
 
765
	/* u1, v1, w1 */
766
	if (op->mask.bo) {
767
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
768
			  src_format << VE0_FORMAT_SHIFT |
769
			  ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
770
		OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
771
			  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
772
			  w_component << VE1_VFCOMPONENT_2_SHIFT |
773
			  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
774
	}
775
}
776
 
777
static void
778
gen6_emit_flush(struct sna *sna)
779
{
780
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
781
	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
782
		  GEN6_PIPE_CONTROL_TC_FLUSH |
783
		  GEN6_PIPE_CONTROL_CS_STALL);
784
	OUT_BATCH(0);
785
	OUT_BATCH(0);
786
}
787
 
788
static void
789
gen6_emit_state(struct sna *sna,
790
		const struct sna_composite_op *op,
791
		uint16_t wm_binding_table)
792
{
793
	bool need_stall = wm_binding_table & 1;
794
 
795
	if (gen6_emit_cc(sna, op->op, op->has_component_alpha, op->dst.format))
796
		need_stall = false;
797
	gen6_emit_sampler(sna,
798
			  SAMPLER_OFFSET(op->src.filter,
799
					 op->src.repeat,
800
					 op->mask.filter,
801
					 op->mask.repeat));
802
	gen6_emit_sf(sna, op->mask.bo != NULL);
803
	gen6_emit_wm(sna,
804
		     op->u.gen6.wm_kernel,
805
		     op->u.gen6.nr_surfaces,
806
		     op->u.gen6.nr_inputs);
807
	gen6_emit_vertex_elements(sna, op);
808
	need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
809
	if (gen6_emit_drawing_rectangle(sna, op))
810
		need_stall = false;
811
//    if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
812
        gen6_emit_flush(sna);
813
        kgem_clear_dirty(&sna->kgem);
814
		kgem_bo_mark_dirty(op->dst.bo);
815
		need_stall = false;
816
//   }
817
	if (need_stall) {
818
		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
819
		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
820
			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
821
		OUT_BATCH(0);
822
		OUT_BATCH(0);
823
	}
824
}
825
 
826
static void gen6_magic_ca_pass(struct sna *sna,
827
			       const struct sna_composite_op *op)
828
{
829
	struct gen6_render_state *state = &sna->render_state.gen6;
830
 
831
	if (!op->need_magic_ca_pass)
832
		return;
833
 
834
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
835
	     sna->render.vertex_start, sna->render.vertex_index));
836
 
837
	gen6_emit_flush(sna);
838
 
839
	gen6_emit_cc(sna, PictOpAdd, TRUE, op->dst.format);
840
	gen6_emit_wm(sna,
841
		     gen6_choose_composite_kernel(PictOpAdd,
842
						  TRUE, TRUE,
843
						  op->is_affine),
844
		     3, 2);
845
 
846
	OUT_BATCH(GEN6_3DPRIMITIVE |
847
		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
848
		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
849
 
850
		  4);
851
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
852
	OUT_BATCH(sna->render.vertex_start);
853
	OUT_BATCH(1);	/* single instance */
854
	OUT_BATCH(0);	/* start instance location */
855
	OUT_BATCH(0);	/* index buffer offset, ignored */
856
 
857
	state->last_primitive = sna->kgem.nbatch;
858
}
859
 
860
static void gen6_vertex_flush(struct sna *sna)
861
{
862
	assert(sna->render_state.gen6.vertex_offset);
863
 
864
	DBG(("%s[%x] = %d\n", __FUNCTION__,
865
	     4*sna->render_state.gen6.vertex_offset,
866
	     sna->render.vertex_index - sna->render.vertex_start));
867
	sna->kgem.batch[sna->render_state.gen6.vertex_offset] =
868
		sna->render.vertex_index - sna->render.vertex_start;
869
	sna->render_state.gen6.vertex_offset = 0;
870
}
871
 
872
static int gen6_vertex_finish(struct sna *sna)
873
{
874
	struct kgem_bo *bo;
875
	unsigned int i;
876
 
877
	DBG(("%s: used=%d / %d\n", __FUNCTION__,
878
	     sna->render.vertex_used, sna->render.vertex_size));
879
	assert(sna->render.vertex_used);
880
 
881
	/* Note: we only need dword alignment (currently) */
882
/*
883
	bo = sna->render.vbo;
884
	if (bo) {
885
		for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
886
			if (sna->render.vertex_reloc[i]) {
887
				DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
888
				     i, sna->render.vertex_reloc[i]));
889
 
890
				sna->kgem.batch[sna->render.vertex_reloc[i]] =
891
					kgem_add_reloc(&sna->kgem,
892
						       sna->render.vertex_reloc[i],
893
						       bo,
894
						       I915_GEM_DOMAIN_VERTEX << 16,
895
						       0);
896
				sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
897
					kgem_add_reloc(&sna->kgem,
898
						       sna->render.vertex_reloc[i]+1,
899
						       bo,
900
						       I915_GEM_DOMAIN_VERTEX << 16,
901
 
902
				sna->render.vertex_reloc[i] = 0;
903
			}
904
		}
905
 
906
		sna->render.vertex_used = 0;
907
		sna->render.vertex_index = 0;
908
		sna->render_state.gen6.vb_id = 0;
909
 
910
		kgem_bo_destroy(&sna->kgem, bo);
911
	}
912
*/
913
	sna->render.vertices = NULL;
914
	sna->render.vbo = kgem_create_linear(&sna->kgem, 256*1024);
915
	if (sna->render.vbo)
916
		sna->render.vertices = kgem_bo_map__cpu(&sna->kgem, sna->render.vbo);
917
	if (sna->render.vertices == NULL) {
918
		kgem_bo_destroy(&sna->kgem, sna->render.vbo);
919
		sna->render.vbo = NULL;
920
		return 0;
921
	}
922
 
923
//   kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo);
924
	if (sna->render.vertex_used) {
925
		DBG(("%s: copying initial buffer x %d to handle=%d\n",
926
		     __FUNCTION__,
927
		     sna->render.vertex_used,
928
		     sna->render.vbo->handle));
929
		memcpy(sna->render.vertices,
930
		       sna->render.vertex_data,
931
		       sizeof(float)*sna->render.vertex_used);
932
	}
933
	sna->render.vertex_size = 64 * 1024 - 1;
934
	return sna->render.vertex_size - sna->render.vertex_used;
935
}
936
 
937
static void gen6_vertex_close(struct sna *sna)
938
{
939
	struct kgem_bo *bo;
940
	unsigned int i, delta = 0;
941
 
942
	if (!sna->render.vertex_used) {
943
		assert(sna->render.vbo == NULL);
944
		assert(sna->render.vertices == sna->render.vertex_data);
945
		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
946
		return;
947
	}
948
 
949
	DBG(("%s: used=%d / %d\n", __FUNCTION__,
950
	     sna->render.vertex_used, sna->render.vertex_size));
951
 
952
	bo = sna->render.vbo;
953
	if (bo == NULL) {
954
		assert(sna->render.vertices == sna->render.vertex_data);
955
		assert(sna->render.vertex_used < ARRAY_SIZE(sna->render.vertex_data));
956
		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
957
			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
958
			     sna->render.vertex_used, sna->kgem.nbatch));
959
			memcpy(sna->kgem.batch + sna->kgem.nbatch,
960
			       sna->render.vertex_data,
961
			       sna->render.vertex_used * 4);
962
			delta = sna->kgem.nbatch * 4;
963
			bo = NULL;
964
			sna->kgem.nbatch += sna->render.vertex_used;
965
		} else {
966
			bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
967
			if (bo && !kgem_bo_write(&sna->kgem, bo,
968
						 sna->render.vertex_data,
969
						 4*sna->render.vertex_used)) {
970
				kgem_bo_destroy(&sna->kgem, bo);
971
				goto reset;
972
			}
973
			DBG(("%s: new vbo: %d\n", __FUNCTION__,
974
			     sna->render.vertex_used));
975
		}
976
	}
977
 
978
	for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
979
		if (sna->render.vertex_reloc[i]) {
980
			DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
981
			     i, sna->render.vertex_reloc[i]));
982
 
983
//           sna->kgem.batch[sna->render.vertex_reloc[i]] =
984
//               kgem_add_reloc(&sna->kgem,
985
//                          sna->render.vertex_reloc[i],
986
//                          bo,
987
//                          I915_GEM_DOMAIN_VERTEX << 16,
988
//                          delta);
989
            sna->kgem.batch[sna->render.vertex_reloc[i]] =
990
                            sna->kgem.batch_obj->gtt_offset+delta+
991
                            sna->kgem.batch_idx*4096;
992
 
993
//           sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
994
//               kgem_add_reloc(&sna->kgem,
995
//                          sna->render.vertex_reloc[i]+1,
996
//                          bo,
997
//                          I915_GEM_DOMAIN_VERTEX << 16,
998
//                          delta + sna->render.vertex_used * 4 - 1);
999
 
1000
            sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
1001
                            sna->kgem.batch_obj->gtt_offset+delta+
1002
                            sna->kgem.batch_idx*4096+
1003
                            sna->render.vertex_used * 4 - 1;
1004
 
1005
			sna->render.vertex_reloc[i] = 0;
1006
		}
1007
	}
1008
 
1009
//   if (bo)
1010
//       kgem_bo_destroy(&sna->kgem, bo);
1011
 
1012
reset:
1013
	sna->render.vertex_used = 0;
1014
	sna->render.vertex_index = 0;
1015
	sna->render_state.gen6.vb_id = 0;
1016
 
1017
	sna->render.vbo = NULL;
1018
	sna->render.vertices = sna->render.vertex_data;
1019
	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1020
}
1021
 
1022
typedef struct gen6_surface_state_padded {
1023
	struct gen6_surface_state state;
1024
	char pad[32 - sizeof(struct gen6_surface_state)];
1025
} gen6_surface_state_padded;
1026
 
1027
static void null_create(struct sna_static_stream *stream)
1028
{
1029
	/* A bunch of zeros useful for legacy border color and depth-stencil */
1030
	sna_static_stream_map(stream, 64, 64);
1031
}
1032
 
1033
static void scratch_create(struct sna_static_stream *stream)
1034
{
1035
	/* 64 bytes of scratch space for random writes, such as
1036
	 * the pipe-control w/a.
1037
	 */
1038
	sna_static_stream_map(stream, 64, 64);
1039
}
1040
 
1041
static void
1042
sampler_state_init(struct gen6_sampler_state *sampler_state,
1043
		   sampler_filter_t filter,
1044
		   sampler_extend_t extend)
1045
{
1046
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1047
 
1048
	/* We use the legacy mode to get the semantics specified by
1049
	 * the Render extension. */
1050
	sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
1051
 
1052
	switch (filter) {
1053
	default:
1054
	case SAMPLER_FILTER_NEAREST:
1055
		sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
1056
		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
1057
		break;
1058
	case SAMPLER_FILTER_BILINEAR:
1059
		sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
1060
		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
1061
		break;
1062
	}
1063
 
1064
	switch (extend) {
1065
	default:
1066
	case SAMPLER_EXTEND_NONE:
1067
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
1068
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
1069
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
1070
		break;
1071
	case SAMPLER_EXTEND_REPEAT:
1072
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
1073
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
1074
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
1075
		break;
1076
	case SAMPLER_EXTEND_PAD:
1077
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1078
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1079
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1080
		break;
1081
	case SAMPLER_EXTEND_REFLECT:
1082
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1083
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1084
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1085
		break;
1086
	}
1087
}
1088
 
1089
static uint32_t gen6_create_cc_viewport(struct sna_static_stream *stream)
1090
{
1091
	struct gen6_cc_viewport vp;
1092
 
1093
	vp.min_depth = -1.e35;
1094
	vp.max_depth = 1.e35;
1095
 
1096
	return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
1097
}
1098
 
1099
#if 0
1100
 
1101
static uint32_t gen6_get_card_format(PictFormat format)
1102
{
1103
	unsigned int i;
1104
 
1105
	for (i = 0; i < ARRAY_SIZE(gen6_tex_formats); i++) {
1106
		if (gen6_tex_formats[i].pict_fmt == format)
1107
			return gen6_tex_formats[i].card_fmt;
1108
	}
1109
	return -1;
1110
}
1111
#endif
1112
 
1113
static uint32_t
1114
gen6_tiling_bits(uint32_t tiling)
1115
{
1116
    return 0;
1117
/*
1118
	switch (tiling) {
1119
	default: assert(0);
1120
	case I915_TILING_NONE: return 0;
1121
	case I915_TILING_X: return GEN6_SURFACE_TILED;
1122
	case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
1123
	}
1124
*/
1125
}
1126
 
1127
/**
1128
 * Sets up the common fields for a surface state buffer for the given
1129
 * picture in the given surface state buffer.
1130
 */
1131
static int
1132
gen6_bind_bo(struct sna *sna,
1133
         struct kgem_bo *bo,
1134
	     uint32_t width,
1135
	     uint32_t height,
1136
	     uint32_t format,
1137
	     Bool is_dst)
1138
{
1139
	uint32_t *ss;
1140
	uint32_t domains;
1141
	uint16_t offset;
1142
 
1143
	/* After the first bind, we manage the cache domains within the batch */
1144
	if (is_dst) {
1145
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1146
//       kgem_bo_mark_dirty(bo);
1147
	} else
1148
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1149
 
1150
//   offset = kgem_bo_get_binding(bo, format);
1151
//   if (offset) {
1152
//        DBG(("[%x]  bo(handle=%x), format=%d, reuse %s binding\n",
1153
//            offset, bo->handle, format,
1154
//            domains & 0xffff ? "render" : "sampler"));
1155
//       return offset;
1156
//   }
1157
 
1158
	offset = sna->kgem.surface - sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1159
	offset *= sizeof(uint32_t);
1160
 
1161
	sna->kgem.surface -=
1162
		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1163
	ss = sna->kgem.batch + sna->kgem.surface;
1164
	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
1165
		 GEN6_SURFACE_BLEND_ENABLED |
1166
		 format << GEN6_SURFACE_FORMAT_SHIFT);
1167
    ss[1] = bo->gaddr;
1168
	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
1169
		 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
1170
	assert(bo->pitch <= (1 << 18));
1171
    ss[3] = (gen6_tiling_bits(0) |
1172
		 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
1173
	ss[4] = 0;
1174
	ss[5] = 0;
1175
 
1176
//   kgem_bo_set_binding(bo, format, offset);
1177
 
1178
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1179
	     offset, bo->handle, ss[1],
1180
	     format, width, height, bo->pitch, bo->tiling,
1181
	     domains & 0xffff ? "render" : "sampler"));
1182
 
1183
	return offset;
1184
}
1185
 
1186
 
1187
static void gen6_emit_vertex_buffer(struct sna *sna,
1188
				    const struct sna_composite_op *op)
1189
{
1190
	int id = op->u.gen6.ve_id;
1191
 
1192
	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
1193
	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
1194
		  4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
1195
	sna->render.vertex_reloc[id] = sna->kgem.nbatch;
1196
	OUT_BATCH(0);
1197
	OUT_BATCH(0);
1198
	OUT_BATCH(0);
1199
 
1200
	sna->render_state.gen6.vb_id |= 1 << id;
1201
}
1202
 
1203
static void gen6_emit_primitive(struct sna *sna)
1204
{
1205
	if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
1206
		DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
1207
		     __FUNCTION__,
1208
		     sna->render.vertex_start,
1209
		     sna->render.vertex_index));
1210
		sna->render_state.gen6.vertex_offset = sna->kgem.nbatch - 5;
1211
		return;
1212
	}
1213
 
1214
	OUT_BATCH(GEN6_3DPRIMITIVE |
1215
		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1216
		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1217
 
1218
		  4);
1219
	sna->render_state.gen6.vertex_offset = sna->kgem.nbatch;
1220
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1221
	OUT_BATCH(sna->render.vertex_index);
1222
	OUT_BATCH(1);	/* single instance */
1223
	OUT_BATCH(0);	/* start instance location */
1224
	OUT_BATCH(0);	/* index buffer offset, ignored */
1225
	sna->render.vertex_start = sna->render.vertex_index;
1226
	DBG(("%s: started new primitive: index=%d\n",
1227
	     __FUNCTION__, sna->render.vertex_start));
1228
 
1229
	sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
1230
}
1231
 
1232
static bool gen6_rectangle_begin(struct sna *sna,
1233
				 const struct sna_composite_op *op)
1234
{
1235
	int id = 1 << op->u.gen6.ve_id;
1236
	int ndwords;
1237
 
1238
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1239
	if ((sna->render_state.gen6.vb_id & id) == 0)
1240
		ndwords += 5;
1241
	if (!kgem_check_batch(&sna->kgem, ndwords))
1242
		return false;
1243
 
1244
	if ((sna->render_state.gen6.vb_id & id) == 0)
1245
		gen6_emit_vertex_buffer(sna, op);
1246
 
1247
	gen6_emit_primitive(sna);
1248
	return true;
1249
}
1250
 
1251
static int gen6_get_rectangles__flush(struct sna *sna,
1252
				      const struct sna_composite_op *op)
1253
{
1254
	if (sna->render_state.gen6.vertex_offset) {
1255
		gen6_vertex_flush(sna);
1256
		gen6_magic_ca_pass(sna, op);
1257
	}
1258
 
1259
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
1260
		return 0;
1261
	if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1)
1262
		return 0;
1263
	if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 2)
1264
		return 0;
1265
 
1266
	return gen6_vertex_finish(sna);
1267
}
1268
 
1269
inline static int gen6_get_rectangles(struct sna *sna,
1270
				      const struct sna_composite_op *op,
1271
				      int want)
1272
{
1273
	int rem = vertex_space(sna);
1274
 
1275
	if (rem < op->floats_per_rect) {
1276
		DBG(("flushing vbo for %s: %d < %d\n",
1277
		     __FUNCTION__, rem, op->floats_per_rect));
1278
		rem = gen6_get_rectangles__flush(sna, op);
1279
		if (rem == 0)
1280
			return 0;
1281
	}
1282
 
1283
	if (sna->render_state.gen6.vertex_offset == 0 &&
1284
	    !gen6_rectangle_begin(sna, op))
1285
		return 0;
1286
 
1287
	if (want > 1 && want * op->floats_per_rect > rem)
1288
		want = rem / op->floats_per_rect;
1289
 
1290
	assert(want > 0);
1291
	sna->render.vertex_index += 3*want;
1292
	return want;
1293
}
1294
 
1295
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
1296
							 uint16_t *offset)
1297
{
1298
	uint32_t *table;
1299
 
1300
	sna->kgem.surface -=
1301
		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1302
	/* Clear all surplus entries to zero in case of prefetch */
1303
	table = memset(sna->kgem.batch + sna->kgem.surface,
1304
		       0, sizeof(struct gen6_surface_state_padded));
1305
 
1306
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1307
 
1308
	*offset = sna->kgem.surface;
1309
	return table;
1310
}
1311
 
1312
static uint32_t
1313
gen6_choose_composite_vertex_buffer(const struct sna_composite_op *op)
1314
{
1315
	int has_mask = op->mask.bo != NULL;
1316
	int is_affine = op->is_affine;
1317
	return has_mask << 1 | is_affine;
1318
}
1319
 
1320
static void
1321
gen6_get_batch(struct sna *sna)
1322
{
1323
	kgem_set_mode(&sna->kgem, KGEM_RENDER);
1324
/*
1325
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1326
		DBG(("%s: flushing batch: %d < %d+%d\n",
1327
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1328
		     150, 4*8));
1329
		kgem_submit(&sna->kgem);
1330
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1331
	}
1332
*/
1333
 
1334
	if (sna->render_state.gen6.needs_invariant)
1335
		gen6_emit_invariant(sna);
1336
}
1337
 
1338
 
1339
static void
1340
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1341
{
1342
	assert (sna->render_state.gen6.vertex_offset == 0);
1343
	if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
1344
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1345
			/* XXX propagate failure */
1346
			gen6_vertex_finish(sna);
1347
 
1348
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1349
		     sna->render_state.gen6.floats_per_vertex,
1350
		     op->floats_per_vertex,
1351
		     sna->render.vertex_index,
1352
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1353
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1354
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1355
		sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
1356
	}
1357
}
1358
 
1359
 
1360
#ifndef MAX
1361
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1362
#endif
1363
 
1364
static uint32_t
1365
gen6_composite_create_blend_state(struct sna_static_stream *stream)
1366
{
1367
	char *base, *ptr;
1368
	int src, dst;
1369
 
1370
	base = sna_static_stream_map(stream,
1371
				     GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
1372
				     64);
1373
 
1374
	ptr = base;
1375
	for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
1376
		for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
1377
			struct gen6_blend_state *blend =
1378
				(struct gen6_blend_state *)ptr;
1379
 
1380
			blend->blend0.dest_blend_factor = dst;
1381
			blend->blend0.source_blend_factor = src;
1382
			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
1383
			blend->blend0.blend_enable =
1384
				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
1385
 
1386
			blend->blend1.post_blend_clamp_enable = 1;
1387
			blend->blend1.pre_blend_clamp_enable = 1;
1388
 
1389
			ptr += GEN6_BLEND_STATE_PADDED_SIZE;
1390
		}
1391
	}
1392
 
1393
	return sna_static_stream_offsetof(stream, base);
1394
}
1395
 
1396
#if 0
1397
static uint32_t gen6_bind_video_source(struct sna *sna,
1398
				       struct kgem_bo *src_bo,
1399
				       uint32_t src_offset,
1400
				       int src_width,
1401
				       int src_height,
1402
				       int src_pitch,
1403
				       uint32_t src_surf_format)
1404
{
1405
	struct gen6_surface_state *ss;
1406
 
1407
	sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1408
 
1409
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1410
	ss->ss0.surface_type = GEN6_SURFACE_2D;
1411
	ss->ss0.surface_format = src_surf_format;
1412
 
1413
	ss->ss1.base_addr =
1414
		kgem_add_reloc(&sna->kgem,
1415
			       sna->kgem.surface + 1,
1416
			       src_bo,
1417
			       I915_GEM_DOMAIN_SAMPLER << 16,
1418
			       src_offset);
1419
 
1420
	ss->ss2.width  = src_width - 1;
1421
	ss->ss2.height = src_height - 1;
1422
	ss->ss3.pitch  = src_pitch - 1;
1423
 
1424
	return sna->kgem.surface * sizeof(uint32_t);
1425
}
1426
 
1427
static void gen6_emit_video_state(struct sna *sna,
1428
				  struct sna_composite_op *op,
1429
				  struct sna_video_frame *frame)
1430
{
1431
	uint32_t src_surf_format;
1432
	uint32_t src_surf_base[6];
1433
	int src_width[6];
1434
	int src_height[6];
1435
	int src_pitch[6];
1436
	uint32_t *binding_table;
1437
	uint16_t offset;
1438
	bool dirty;
1439
	int n_src, n;
1440
 
1441
	gen6_get_batch(sna);
1442
	dirty = kgem_bo_is_dirty(op->dst.bo);
1443
 
1444
	src_surf_base[0] = 0;
1445
	src_surf_base[1] = 0;
1446
	src_surf_base[2] = frame->VBufOffset;
1447
	src_surf_base[3] = frame->VBufOffset;
1448
	src_surf_base[4] = frame->UBufOffset;
1449
	src_surf_base[5] = frame->UBufOffset;
1450
 
1451
	if (is_planar_fourcc(frame->id)) {
1452
		src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
1453
		src_width[1]  = src_width[0]  = frame->width;
1454
		src_height[1] = src_height[0] = frame->height;
1455
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1456
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1457
			frame->width / 2;
1458
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1459
			frame->height / 2;
1460
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1461
			frame->pitch[0];
1462
		n_src = 6;
1463
	} else {
1464
		if (frame->id == FOURCC_UYVY)
1465
			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
1466
		else
1467
			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
1468
 
1469
		src_width[0]  = frame->width;
1470
		src_height[0] = frame->height;
1471
		src_pitch[0]  = frame->pitch[0];
1472
		n_src = 1;
1473
	}
1474
 
1475
	binding_table = gen6_composite_get_binding_table(sna, &offset);
1476
 
1477
	binding_table[0] =
1478
		gen6_bind_bo(sna,
1479
			     op->dst.bo, op->dst.width, op->dst.height,
1480
			     gen6_get_dest_format(op->dst.format),
1481
			     TRUE);
1482
	for (n = 0; n < n_src; n++) {
1483
		binding_table[1+n] =
1484
			gen6_bind_video_source(sna,
1485
					       frame->bo,
1486
					       src_surf_base[n],
1487
					       src_width[n],
1488
					       src_height[n],
1489
					       src_pitch[n],
1490
					       src_surf_format);
1491
	}
1492
 
1493
	gen6_emit_state(sna, op, offset | dirty);
1494
}
1495
 
1496
static Bool
1497
gen6_render_video(struct sna *sna,
1498
		  struct sna_video *video,
1499
		  struct sna_video_frame *frame,
1500
		  RegionPtr dstRegion,
1501
		  short src_w, short src_h,
1502
		  short drw_w, short drw_h,
1503
		  PixmapPtr pixmap)
1504
{
1505
	struct sna_composite_op tmp;
1506
	int nbox, dxo, dyo, pix_xoff, pix_yoff;
1507
	float src_scale_x, src_scale_y;
1508
	struct sna_pixmap *priv;
1509
	BoxPtr box;
1510
 
1511
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1512
	     __FUNCTION__, src_w, src_h, drw_w, drw_h,
1513
	     REGION_NUM_RECTS(dstRegion),
1514
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1515
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1516
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1517
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1518
 
1519
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1520
	if (priv == NULL)
1521
		return FALSE;
1522
 
1523
	memset(&tmp, 0, sizeof(tmp));
1524
 
1525
	tmp.op = PictOpSrc;
1526
	tmp.dst.pixmap = pixmap;
1527
	tmp.dst.width  = pixmap->drawable.width;
1528
	tmp.dst.height = pixmap->drawable.height;
1529
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1530
	tmp.dst.bo = priv->gpu_bo;
1531
 
1532
	tmp.src.bo = frame->bo;
1533
	tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1534
	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1535
 
1536
	tmp.mask.bo = NULL;
1537
 
1538
	tmp.is_affine = TRUE;
1539
	tmp.floats_per_vertex = 3;
1540
	tmp.floats_per_rect = 9;
1541
 
1542
	if (is_planar_fourcc(frame->id)) {
1543
		tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PLANAR;
1544
		tmp.u.gen6.nr_surfaces = 7;
1545
	} else {
1546
		tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PACKED;
1547
		tmp.u.gen6.nr_surfaces = 2;
1548
	}
1549
	tmp.u.gen6.nr_inputs = 1;
1550
	tmp.u.gen6.ve_id = 1;
1551
 
1552
	kgem_set_mode(&sna->kgem, KGEM_RENDER);
1553
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1554
		kgem_submit(&sna->kgem);
1555
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1556
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1557
	}
1558
 
1559
	gen6_emit_video_state(sna, &tmp, frame);
1560
	gen6_align_vertex(sna, &tmp);
1561
 
1562
	/* Set up the offset for translating from the given region (in screen
1563
	 * coordinates) to the backing pixmap.
1564
	 */
1565
#ifdef COMPOSITE
1566
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1567
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1568
#else
1569
	pix_xoff = 0;
1570
	pix_yoff = 0;
1571
#endif
1572
 
1573
	dxo = dstRegion->extents.x1;
1574
	dyo = dstRegion->extents.y1;
1575
 
1576
	/* Use normalized texture coordinates */
1577
	src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
1578
	src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
1579
 
1580
	box = REGION_RECTS(dstRegion);
1581
	nbox = REGION_NUM_RECTS(dstRegion);
1582
	while (nbox--) {
1583
		BoxRec r;
1584
 
1585
		r.x1 = box->x1 + pix_xoff;
1586
		r.x2 = box->x2 + pix_xoff;
1587
		r.y1 = box->y1 + pix_yoff;
1588
		r.y2 = box->y2 + pix_yoff;
1589
 
1590
		if (unlikely(!gen6_get_rectangles(sna, &tmp, 1))) {
1591
			_kgem_submit(&sna->kgem);
1592
			gen6_emit_video_state(sna, &tmp, frame);
1593
			gen6_get_rectangles(sna, &tmp, 1);
1594
		}
1595
 
1596
		OUT_VERTEX(r.x2, r.y2);
1597
		OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
1598
		OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
1599
 
1600
		OUT_VERTEX(r.x1, r.y2);
1601
		OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
1602
		OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
1603
 
1604
		OUT_VERTEX(r.x1, r.y1);
1605
		OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
1606
		OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
1607
 
1608
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1609
			sna_damage_add_box(&priv->gpu_damage, &r);
1610
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1611
		}
1612
		box++;
1613
	}
1614
	priv->clear = false;
1615
 
1616
	gen6_vertex_flush(sna);
1617
	return TRUE;
1618
}
1619
 
1620
#endif
1621
 
1622
static void gen6_render_composite_done(struct sna *sna,
1623
				       const struct sna_composite_op *op)
1624
{
1625
	DBG(("%s\n", __FUNCTION__));
1626
 
1627
	if (sna->render_state.gen6.vertex_offset) {
1628
		gen6_vertex_flush(sna);
1629
		gen6_magic_ca_pass(sna, op);
1630
	}
1631
 
1632
//   if (op->mask.bo)
1633
//       kgem_bo_destroy(&sna->kgem, op->mask.bo);
1634
//   if (op->src.bo)
1635
//       kgem_bo_destroy(&sna->kgem, op->src.bo);
1636
 
1637
//   sna_render_composite_redirect_done(sna, op);
1638
}
1639
 
1640
 
1641
 
1642
static void
1643
gen6_emit_copy_state(struct sna *sna,
1644
		     const struct sna_composite_op *op)
1645
{
1646
	uint32_t *binding_table;
1647
	uint16_t offset;
1648
	bool dirty;
1649
 
1650
	gen6_get_batch(sna);
1651
//   dirty = kgem_bo_is_dirty(op->dst.bo);
1652
 
1653
	binding_table = gen6_composite_get_binding_table(sna, &offset);
1654
 
1655
	binding_table[0] =
1656
		gen6_bind_bo(sna,
1657
			     op->dst.bo, op->dst.width, op->dst.height,
1658
                 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
1659
			     TRUE);
1660
	binding_table[1] =
1661
		gen6_bind_bo(sna,
1662
			     op->src.bo, op->src.width, op->src.height,
1663
                 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
1664
			     FALSE);
1665
 
1666
	if (sna->kgem.surface == offset &&
1667
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
1668
		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1669
		offset = sna->render_state.gen6.surface_table;
1670
	}
1671
 
1672
	gen6_emit_state(sna, op, offset | dirty);
1673
}
1674
 
1675
 
1676
static void
1677
gen6_render_copy_blt(struct sna *sna,
1678
             const struct sna_composite_op *op,
1679
		     int16_t sx, int16_t sy,
1680
		     int16_t w,  int16_t h,
1681
		     int16_t dx, int16_t dy)
1682
{
1683
    if (unlikely(!gen6_get_rectangles(sna, op, 1))) {
1684
		_kgem_submit(&sna->kgem);
1685
        gen6_emit_copy_state(sna, op);
1686
        gen6_get_rectangles(sna, op, 1);
1687
	}
1688
 
1689
	OUT_VERTEX(dx+w, dy+h);
1690
    OUT_VERTEX_F((sx+w)*op->src.scale[0]);
1691
    OUT_VERTEX_F((sy+h)*op->src.scale[1]);
1692
 
1693
	OUT_VERTEX(dx, dy+h);
1694
    OUT_VERTEX_F(sx*op->src.scale[0]);
1695
    OUT_VERTEX_F((sy+h)*op->src.scale[1]);
1696
 
1697
	OUT_VERTEX(dx, dy);
1698
    OUT_VERTEX_F(sx*op->src.scale[0]);
1699
    OUT_VERTEX_F(sy*op->src.scale[1]);
1700
}
1701
 
1702
static void
1703
gen6_render_copy_done(struct sna *sna)
1704
{
1705
	DBG(("%s()\n", __FUNCTION__));
1706
 
1707
	if (sna->render_state.gen6.vertex_offset)
1708
		gen6_vertex_flush(sna);
1709
}
1710
 
1711
static Bool
1712
gen6_render_copy(struct sna *sna, uint8_t alu,
1713
         bitmap_t *src, struct kgem_bo *src_bo,
1714
         bitmap_t *dst, struct kgem_bo *dst_bo,
1715
         int dst_x, int dst_y, int src_x, int src_y, int w, int h)
1716
{
1717
    struct sna_composite_op op;
1718
 
1719
    memset(&op, 0, sizeof(op));
1720
 
1721
	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
1722
	     __FUNCTION__, alu,
1723
         src->width, src->height,
1724
         dst->width, dst->height));
1725
 
1726
//    printf("%s %dx%d  src=(%dx%d), dst=(%dx%d)\n",
1727
//         __FUNCTION__,dst_x, dst_y,
1728
//         src->width, src->height,
1729
//         dst->width, dst->height);
1730
 
1731
    op.dst.format = 0;
1732
    op.src.pict_format = 0;
1733
 
1734
    op.op = PictOpSrc;
1735
 
1736
    op.dst.pixmap = dst;
1737
    op.dst.width  = dst->width;
1738
    op.dst.height = dst->height;
1739
    op.dst.bo = dst_bo;
1740
 
1741
    op.src.bo = src_bo;
1742
    op.src.card_format = GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
1743
    op.src.width  = src->width;
1744
    op.src.height = src->height;
1745
 
1746
//    src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
1747
//    src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
1748
 
1749
    op.src.scale[0] = 1.f/w;            //src->width;
1750
    op.src.scale[1] = 1.f/h;            //src->height;
1751
    op.src.filter = SAMPLER_FILTER_BILINEAR;
1752
    op.src.repeat = SAMPLER_EXTEND_NONE;
1753
 
1754
    op.mask.bo = NULL;
1755
 
1756
    op.is_affine = true;
1757
    op.floats_per_vertex = 3;
1758
    op.floats_per_rect = 9;
1759
 
1760
    op.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
1761
    op.u.gen6.nr_surfaces = 2;
1762
    op.u.gen6.nr_inputs = 1;
1763
    op.u.gen6.ve_id = 1;
1764
 
1765
    gen6_emit_copy_state(sna, &op);
1766
    gen6_align_vertex(sna, &op);
1767
 
1768
    gen6_render_copy_blt(sna, &op, src_x, src_y, w, h, dst_x, dst_y);
1769
    gen6_render_copy_done(sna);
1770
 
1771
    _kgem_submit(&sna->kgem);
1772
 
1773
	return TRUE;
1774
}
1775
 
1776
static void
1777
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
1778
{
1779
	uint32_t *binding_table;
1780
	uint16_t offset;
1781
	bool dirty;
1782
 
1783
	gen6_get_batch(sna);
1784
//   dirty = kgem_bo_is_dirty(op->dst.bo);
1785
 
1786
	binding_table = gen6_composite_get_binding_table(sna, &offset);
1787
 
1788
	binding_table[0] =
1789
		gen6_bind_bo(sna,
1790
                 op->dst.bo, 1024, 768,
1791
                 GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
1792
			     TRUE);
1793
	binding_table[1] =
1794
		gen6_bind_bo(sna,
1795
			     op->src.bo, 1, 1,
1796
			     GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
1797
			     FALSE);
1798
 
1799
	if (sna->kgem.surface == offset &&
1800
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
1801
		sna->kgem.surface +=
1802
			sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
1803
		offset = sna->render_state.gen6.surface_table;
1804
	}
1805
 
1806
	gen6_emit_state(sna, op, offset | dirty);
1807
}
1808
 
1809
 
1810
static Bool
1811
gen6_render_clear(struct sna *sna, bitmap_t *dst, struct kgem_bo *bo)
1812
{
1813
	struct sna_composite_op tmp;
1814
 
1815
 
1816
	DBG(("%s: %dx%d\n",
1817
	     __FUNCTION__,
1818
         dst->width,
1819
         dst->height));
1820
 
1821
	tmp.op = PictOpSrc;
1822
 
1823
	tmp.dst.pixmap = dst;
1824
    tmp.dst.width  = dst->width;
1825
    tmp.dst.height = dst->height;
1826
    tmp.dst.format = 0; //PICT_a8r8g8b8;
1827
	tmp.dst.bo = bo;
1828
	tmp.dst.x = tmp.dst.y = 0;
1829
 
1830
//   tmp.src.bo = sna_render_get_solid(sna, 0);
1831
    tmp.src.bo     = bo;
1832
	tmp.src.filter = SAMPLER_FILTER_NEAREST;
1833
	tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
1834
 
1835
	tmp.mask.bo = NULL;
1836
	tmp.mask.filter = SAMPLER_FILTER_NEAREST;
1837
	tmp.mask.repeat = SAMPLER_EXTEND_NONE;
1838
 
1839
	tmp.is_affine = TRUE;
1840
	tmp.floats_per_vertex = 3;
1841
	tmp.floats_per_rect = 9;
1842
	tmp.has_component_alpha = 0;
1843
	tmp.need_magic_ca_pass = FALSE;
1844
 
1845
	tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
1846
	tmp.u.gen6.nr_surfaces = 2;
1847
	tmp.u.gen6.nr_inputs = 1;
1848
	tmp.u.gen6.ve_id = 1;
1849
 
1850
//   if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
1851
//       _kgem_submit(&sna->kgem);
1852
//       assert(kgem_check_bo(&sna->kgem, bo, NULL));
1853
//   }
1854
 
1855
	gen6_emit_fill_state(sna, &tmp);
1856
	gen6_align_vertex(sna, &tmp);
1857
 
1858
	if (unlikely(!gen6_get_rectangles(sna, &tmp, 1))) {
1859
		_kgem_submit(&sna->kgem);
1860
		gen6_emit_fill_state(sna, &tmp);
1861
		gen6_get_rectangles(sna, &tmp, 1);
1862
	}
1863
 
1864
    OUT_VERTEX(dst->width, dst->height);
1865
	OUT_VERTEX_F(1);
1866
	OUT_VERTEX_F(1);
1867
 
1868
    OUT_VERTEX(0, dst->height);
1869
	OUT_VERTEX_F(0);
1870
	OUT_VERTEX_F(1);
1871
 
1872
	OUT_VERTEX(0, 0);
1873
	OUT_VERTEX_F(0);
1874
	OUT_VERTEX_F(0);
1875
 
1876
    gen6_vertex_flush(sna);
1877
//   kgem_bo_destroy(&sna->kgem, tmp.src.bo);
1878
//    gen6_render_composite_done(sna, &tmp);
1879
    _kgem_submit(&sna->kgem);
1880
 
1881
	return TRUE;
1882
}
1883
 
1884
static void gen6_render_flush(struct sna *sna)
1885
{
1886
	gen6_vertex_close(sna);
1887
}
1888
 
1889
 
1890
static void
1891
gen6_render_retire(struct kgem *kgem)
1892
{
1893
	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
1894
		kgem->ring = kgem->mode;
1895
}
1896
 
1897
static void gen6_render_reset(struct sna *sna)
1898
{
1899
	sna->render_state.gen6.needs_invariant = TRUE;
1900
	sna->render_state.gen6.vb_id = 0;
1901
	sna->render_state.gen6.ve_id = -1;
1902
	sna->render_state.gen6.last_primitive = -1;
1903
 
1904
	sna->render_state.gen6.num_sf_outputs = 0;
1905
	sna->render_state.gen6.samplers = -1;
1906
	sna->render_state.gen6.blend = -1;
1907
	sna->render_state.gen6.kernel = -1;
1908
	sna->render_state.gen6.drawrect_offset = -1;
1909
	sna->render_state.gen6.drawrect_limit = -1;
1910
	sna->render_state.gen6.surface_table = -1;
1911
}
1912
 
1913
static void gen6_render_fini(struct sna *sna)
1914
{
1915
//   kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
1916
}
1917
 
1918
static Bool gen6_render_setup(struct sna *sna)
1919
{
1920
	struct gen6_render_state *state = &sna->render_state.gen6;
1921
	struct sna_static_stream general;
1922
	struct gen6_sampler_state *ss;
1923
	int i, j, k, l, m;
1924
 
1925
    sna_static_stream_init(&general);
1926
 
1927
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
1928
	 * dumps, you know it points to zero.
1929
	 */
1930
    null_create(&general);
1931
    scratch_create(&general);
1932
 
1933
	for (m = 0; m < GEN6_KERNEL_COUNT; m++)
1934
		state->wm_kernel[m] =
1935
			sna_static_stream_add(&general,
1936
					       wm_kernels[m].data,
1937
					       wm_kernels[m].size,
1938
					       64);
1939
 
1940
	ss = sna_static_stream_map(&general,
1941
				   2 * sizeof(*ss) *
1942
				   FILTER_COUNT * EXTEND_COUNT *
1943
				   FILTER_COUNT * EXTEND_COUNT,
1944
				   32);
1945
	state->wm_state = sna_static_stream_offsetof(&general, ss);
1946
	for (i = 0; i < FILTER_COUNT; i++) {
1947
		for (j = 0; j < EXTEND_COUNT; j++) {
1948
			for (k = 0; k < FILTER_COUNT; k++) {
1949
				for (l = 0; l < EXTEND_COUNT; l++) {
1950
					sampler_state_init(ss++, i, j);
1951
					sampler_state_init(ss++, k, l);
1952
				}
1953
			}
1954
		}
1955
	}
1956
 
1957
    state->cc_vp = gen6_create_cc_viewport(&general);
1958
    state->cc_blend = gen6_composite_create_blend_state(&general);
1959
 
1960
    state->general_bo = sna_static_stream_fini(sna, &general);
1961
    return state->general_bo != NULL;
1962
}
1963
 
1964
Bool gen6_render_init(struct sna *sna)
1965
{
1966
    if (!gen6_render_setup(sna))
1967
        return FALSE;
1968
 
1969
//    sna->kgem.context_switch = gen6_render_context_switch;
1970
      sna->kgem.retire = gen6_render_retire;
1971
 
1972
//    sna->render.composite = gen6_render_composite;
1973
//    sna->render.video = gen6_render_video;
1974
 
1975
//    sna->render.copy_boxes = gen6_render_copy_boxes;
1976
    sna->render.copy = gen6_render_copy;
1977
 
1978
//    sna->render.fill_boxes = gen6_render_fill_boxes;
1979
//    sna->render.fill = gen6_render_fill;
1980
//    sna->render.fill_one = gen6_render_fill_one;
1981
    sna->render.clear = gen6_render_clear;
1982
 
1983
    sna->render.flush = gen6_render_flush;
1984
    sna->render.reset = gen6_render_reset;
1985
//    sna->render.fini = gen6_render_fini;
1986
 
1987
    sna->render.max_3d_size = GEN6_MAX_SIZE;
1988
    sna->render.max_3d_pitch = 1 << 18;
1989
    return TRUE;
1990
}