Subversion Repositories Kolibri OS

Rev

Rev 3291 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3291 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen4_render.h"
45
#include "gen4_source.h"
46
#include "gen4_vertex.h"
47
 
48
/* gen4 has a serious issue with its shaders that we need to flush
49
 * after every rectangle... So until that is resolved, prefer
50
 * the BLT engine.
51
 */
52
#define FORCE_SPANS 0
53
#define FORCE_NONRECTILINEAR_SPANS -1
54
 
55
#define NO_COMPOSITE 0
56
#define NO_COMPOSITE_SPANS 0
57
#define NO_COPY 0
58
#define NO_COPY_BOXES 0
59
#define NO_FILL 0
60
#define NO_FILL_ONE 0
61
#define NO_FILL_BOXES 0
62
#define NO_VIDEO 0
63
 
64
#define GEN4_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
65
 
66
/* Set up a default static partitioning of the URB, which is supposed to
67
 * allow anything we would want to do, at potentially lower performance.
68
 */
69
#define URB_CS_ENTRY_SIZE     1
70
#define URB_CS_ENTRIES        0
71
 
72
#define URB_VS_ENTRY_SIZE     1
73
#define URB_VS_ENTRIES        32
74
 
75
#define URB_GS_ENTRY_SIZE     0
76
#define URB_GS_ENTRIES        0
77
 
78
#define URB_CLIP_ENTRY_SIZE   0
79
#define URB_CLIP_ENTRIES      0
80
 
81
#define URB_SF_ENTRY_SIZE     2
82
#define URB_SF_ENTRIES        64
83
 
84
/*
85
 * this program computes dA/dx and dA/dy for the texture coordinates along
86
 * with the base texture coordinate. It was extracted from the Mesa driver
87
 */
88
 
89
#define SF_KERNEL_NUM_GRF 16
90
#define PS_KERNEL_NUM_GRF 32
91
 
92
#define GEN4_MAX_SF_THREADS 24
93
#define GEN4_MAX_WM_THREADS 32
94
#define G4X_MAX_WM_THREADS 50
95
 
96
static const uint32_t ps_kernel_packed_static[][4] = {
97
#include "exa_wm_xy.g4b"
98
#include "exa_wm_src_affine.g4b"
99
#include "exa_wm_src_sample_argb.g4b"
100
#include "exa_wm_yuv_rgb.g4b"
101
#include "exa_wm_write.g4b"
102
};
103
 
104
static const uint32_t ps_kernel_planar_static[][4] = {
105
#include "exa_wm_xy.g4b"
106
#include "exa_wm_src_affine.g4b"
107
#include "exa_wm_src_sample_planar.g4b"
108
#include "exa_wm_yuv_rgb.g4b"
109
#include "exa_wm_write.g4b"
110
};
111
 
112
#define NOKERNEL(kernel_enum, func, masked) \
113
    [kernel_enum] = {func, 0, masked}
114
#define KERNEL(kernel_enum, kernel, masked) \
115
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
116
static const struct wm_kernel_info {
117
    const void *data;
118
    unsigned int size;
119
    bool has_mask;
120
} wm_kernels[] = {
121
    NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
122
    NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
123
 
124
    NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
125
    NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
126
 
127
    NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
128
    NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
129
 
130
    NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
131
    NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
132
 
133
    NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
134
    NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
135
 
136
    KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
137
    KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
138
};
139
#undef KERNEL
140
 
141
static const struct blendinfo {
142
    bool src_alpha;
143
    uint32_t src_blend;
144
    uint32_t dst_blend;
145
} gen4_blend_op[] = {
146
    /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
147
    /* Src */   {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
148
    /* Dst */   {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
149
    /* Over */  {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
150
    /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
151
    /* In */    {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
152
    /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
153
    /* Out */   {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
154
    /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
155
    /* Atop */  {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
156
    /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
157
    /* Xor */   {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
158
    /* Add */   {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
159
};
160
 
161
/**
162
 * Highest-valued BLENDFACTOR used in gen4_blend_op.
163
 *
164
 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
165
 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
166
 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
167
 */
168
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
169
 
170
#define BLEND_OFFSET(s, d) \
171
    (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
172
 
173
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
174
    ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
175
 
176
static void
177
gen4_emit_pipelined_pointers(struct sna *sna,
178
                 const struct sna_composite_op *op,
179
                 int blend, int kernel);
180
 
181
#define OUT_BATCH(v) batch_emit(sna, v)
182
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
183
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
184
 
185
#define GEN4_MAX_3D_SIZE 8192
186
 
187
static inline bool too_large(int width, int height)
188
{
189
    return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
190
}
191
 
192
static int
193
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
194
{
195
    int base;
196
 
197
    if (has_mask) {
198
        if (is_ca) {
199
            if (gen4_blend_op[op].src_alpha)
200
                base = WM_KERNEL_MASKSA;
201
            else
202
                base = WM_KERNEL_MASKCA;
203
        } else
204
            base = WM_KERNEL_MASK;
205
    } else
206
        base = WM_KERNEL;
207
 
208
    return base + !is_affine;
209
}
210
 
211
static bool gen4_magic_ca_pass(struct sna *sna,
212
                   const struct sna_composite_op *op)
213
{
214
    struct gen4_render_state *state = &sna->render_state.gen4;
215
 
216
    if (!op->need_magic_ca_pass)
217
        return false;
218
 
219
    assert(sna->render.vertex_index > sna->render.vertex_start);
220
 
221
    DBG(("%s: CA fixup\n", __FUNCTION__));
222
    assert(op->mask.bo != NULL);
223
    assert(op->has_component_alpha);
224
 
225
    gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
226
                     gen4_choose_composite_kernel(PictOpAdd,
227
                                  true, true, op->is_affine));
228
 
229
    OUT_BATCH(GEN4_3DPRIMITIVE |
230
          GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
231
          (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
232
          (0 << 9) |
233
          4);
234
    OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
235
    OUT_BATCH(sna->render.vertex_start);
236
    OUT_BATCH(1);   /* single instance */
237
    OUT_BATCH(0);   /* start instance location */
238
    OUT_BATCH(0);   /* index buffer offset, ignored */
239
 
240
    state->last_primitive = sna->kgem.nbatch;
241
    return true;
242
}
243
 
244
static uint32_t gen4_get_blend(int op,
245
                   bool has_component_alpha,
246
                   uint32_t dst_format)
247
{
248
    uint32_t src, dst;
249
 
250
    src = GEN4_BLENDFACTOR_ONE;  //gen4_blend_op[op].src_blend;
3769 Serge 251
    dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
3291 Serge 252
#if 0
253
    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
254
     * it as always 1.
255
     */
256
    if (PICT_FORMAT_A(dst_format) == 0) {
257
        if (src == GEN4_BLENDFACTOR_DST_ALPHA)
258
            src = GEN4_BLENDFACTOR_ONE;
259
        else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
260
            src = GEN4_BLENDFACTOR_ZERO;
261
    }
262
 
263
    /* If the source alpha is being used, then we should only be in a
264
     * case where the source blend factor is 0, and the source blend
265
     * value is the mask channels multiplied by the source picture's alpha.
266
     */
267
    if (has_component_alpha && gen4_blend_op[op].src_alpha) {
268
        if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
269
            dst = GEN4_BLENDFACTOR_SRC_COLOR;
270
        else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
271
            dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
272
    }
273
#endif
274
    DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
275
         op, dst_format, PICT_FORMAT_A(dst_format),
276
         src, dst, BLEND_OFFSET(src, dst)));
277
    return BLEND_OFFSET(src, dst);
278
}
279
 
280
static uint32_t gen4_get_card_format(PictFormat format)
281
{
282
    switch (format) {
283
    default:
284
        return -1;
285
    case PICT_a8r8g8b8:
286
        return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
287
    case PICT_x8r8g8b8:
288
        return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
289
    case PICT_a8:
290
        return GEN4_SURFACEFORMAT_A8_UNORM;
291
    }
292
}
293
 
294
static uint32_t gen4_get_dest_format(PictFormat format)
295
{
296
    switch (format) {
297
    default:
298
        return -1;
299
    case PICT_a8r8g8b8:
300
    case PICT_x8r8g8b8:
301
        return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
302
    case PICT_a8:
303
        return GEN4_SURFACEFORMAT_A8_UNORM;
304
    }
305
}
306
 
307
typedef struct gen4_surface_state_padded {
308
	struct gen4_surface_state state;
309
	char pad[32 - sizeof(struct gen4_surface_state)];
310
} gen4_surface_state_padded;
311
 
312
static void null_create(struct sna_static_stream *stream)
313
{
314
	/* A bunch of zeros useful for legacy border color and depth-stencil */
315
	sna_static_stream_map(stream, 64, 64);
316
}
317
 
318
static void
319
sampler_state_init(struct gen4_sampler_state *sampler_state,
320
		   sampler_filter_t filter,
321
		   sampler_extend_t extend)
322
{
323
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
324
 
325
	/* We use the legacy mode to get the semantics specified by
326
	 * the Render extension. */
327
	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
328
 
329
	switch (filter) {
330
	default:
331
	case SAMPLER_FILTER_NEAREST:
332
		sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
333
		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
334
		break;
335
	case SAMPLER_FILTER_BILINEAR:
336
		sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
337
		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
338
		break;
339
	}
340
 
341
	switch (extend) {
342
	default:
343
	case SAMPLER_EXTEND_NONE:
344
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
345
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
346
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
347
		break;
348
	case SAMPLER_EXTEND_REPEAT:
349
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
350
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
351
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
352
		break;
353
	case SAMPLER_EXTEND_PAD:
354
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
355
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
356
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
357
		break;
358
	case SAMPLER_EXTEND_REFLECT:
359
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
360
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
361
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
362
		break;
363
	}
364
}
365
 
366
static uint32_t
367
gen4_tiling_bits(uint32_t tiling)
368
{
369
	switch (tiling) {
370
	default: assert(0);
371
	case I915_TILING_NONE: return 0;
372
	case I915_TILING_X: return GEN4_SURFACE_TILED;
373
	case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
374
	}
375
}
376
 
377
/**
378
 * Sets up the common fields for a surface state buffer for the given
379
 * picture in the given surface state buffer.
380
 */
381
static uint32_t
382
gen4_bind_bo(struct sna *sna,
383
	     struct kgem_bo *bo,
384
	     uint32_t width,
385
	     uint32_t height,
386
	     uint32_t format,
387
	     bool is_dst)
388
{
389
	uint32_t domains;
390
	uint16_t offset;
391
	uint32_t *ss;
392
 
393
	assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
394
 
395
	/* After the first bind, we manage the cache domains within the batch */
396
	offset = kgem_bo_get_binding(bo, format);
397
	if (offset) {
398
		if (is_dst)
399
			kgem_bo_mark_dirty(bo);
400
		return offset * sizeof(uint32_t);
401
	}
402
 
403
	offset = sna->kgem.surface -=
404
		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
405
	ss = sna->kgem.batch + offset;
406
 
407
	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
408
		 GEN4_SURFACE_BLEND_ENABLED |
409
		 format << GEN4_SURFACE_FORMAT_SHIFT);
410
 
411
	if (is_dst)
412
		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
413
	else
414
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
415
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
416
 
417
	ss[2] = ((width - 1)  << GEN4_SURFACE_WIDTH_SHIFT |
418
		 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
419
	ss[3] = (gen4_tiling_bits(bo->tiling) |
420
		 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
421
	ss[4] = 0;
422
	ss[5] = 0;
423
 
424
	kgem_bo_set_binding(bo, format, offset);
425
 
426
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
427
	     offset, bo->handle, ss[1],
428
	     format, width, height, bo->pitch, bo->tiling,
429
	     domains & 0xffff ? "render" : "sampler"));
430
 
431
	return offset * sizeof(uint32_t);
432
}
433
 
434
static void gen4_emit_vertex_buffer(struct sna *sna,
435
				    const struct sna_composite_op *op)
436
{
437
	int id = op->u.gen4.ve_id;
438
 
439
	assert((sna->render.vb_id & (1 << id)) == 0);
440
 
441
	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
442
	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
443
		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
444
	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
445
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
446
	OUT_BATCH(0);
447
	OUT_BATCH(0);
448
	OUT_BATCH(0);
449
 
450
	sna->render.vb_id |= 1 << id;
451
}
452
 
453
static void gen4_emit_primitive(struct sna *sna)
454
{
455
	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
456
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
457
		return;
458
	}
459
 
460
	OUT_BATCH(GEN4_3DPRIMITIVE |
461
		  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
462
		  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
463
		  (0 << 9) |
464
		  4);
465
	sna->render.vertex_offset = sna->kgem.nbatch;
466
	OUT_BATCH(0);	/* vertex count, to be filled in later */
467
	OUT_BATCH(sna->render.vertex_index);
468
	OUT_BATCH(1);	/* single instance */
469
	OUT_BATCH(0);	/* start instance location */
470
	OUT_BATCH(0);	/* index buffer offset, ignored */
471
	sna->render.vertex_start = sna->render.vertex_index;
472
 
473
	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
474
}
475
 
476
static bool gen4_rectangle_begin(struct sna *sna,
477
				 const struct sna_composite_op *op)
478
{
479
	int id = op->u.gen4.ve_id;
480
	int ndwords;
481
 
482
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
483
		return true;
484
 
485
	/* 7xpipelined pointers + 6xprimitive + 1xflush */
486
	ndwords = op->need_magic_ca_pass? 20 : 6;
487
	if ((sna->render.vb_id & (1 << id)) == 0)
488
		ndwords += 5;
489
 
490
	if (!kgem_check_batch(&sna->kgem, ndwords))
491
		return false;
492
 
493
	if ((sna->render.vb_id & (1 << id)) == 0)
494
		gen4_emit_vertex_buffer(sna, op);
495
	if (sna->render.vertex_offset == 0)
496
		gen4_emit_primitive(sna);
497
 
498
	return true;
499
}
500
 
501
static int gen4_get_rectangles__flush(struct sna *sna,
502
				      const struct sna_composite_op *op)
503
{
504
	/* Preventing discarding new vbo after lock contention */
505
	if (sna_vertex_wait__locked(&sna->render)) {
506
		int rem = vertex_space(sna);
507
		if (rem > op->floats_per_rect)
508
			return rem;
509
	}
510
 
511
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
512
		return 0;
513
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
514
		return 0;
515
 
516
	if (op->need_magic_ca_pass && sna->render.vbo)
517
		return 0;
518
 
519
	if (sna->render.vertex_offset) {
520
		gen4_vertex_flush(sna);
521
		if (gen4_magic_ca_pass(sna, op))
522
			gen4_emit_pipelined_pointers(sna, op, op->op,
523
						     op->u.gen4.wm_kernel);
524
	}
525
 
526
	return gen4_vertex_finish(sna);
527
}
528
 
529
inline static int gen4_get_rectangles(struct sna *sna,
530
				      const struct sna_composite_op *op,
531
				      int want,
532
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
533
{
534
	int rem;
535
 
536
	assert(want);
537
 
538
start:
539
	rem = vertex_space(sna);
540
	if (unlikely(rem < op->floats_per_rect)) {
541
		DBG(("flushing vbo for %s: %d < %d\n",
542
		     __FUNCTION__, rem, op->floats_per_rect));
543
		rem = gen4_get_rectangles__flush(sna, op);
544
		if (unlikely(rem == 0))
545
			goto flush;
546
	}
547
 
548
	if (unlikely(sna->render.vertex_offset == 0)) {
549
		if (!gen4_rectangle_begin(sna, op))
550
			goto flush;
551
		else
552
			goto start;
553
	}
554
 
555
	assert(op->floats_per_rect >= vertex_space(sna));
556
	assert(rem <= vertex_space(sna));
557
	if (want > 1 && want * op->floats_per_rect > rem)
558
		want = rem / op->floats_per_rect;
559
 
560
	sna->render.vertex_index += 3*want;
561
	return want;
562
 
563
flush:
564
	if (sna->render.vertex_offset) {
565
		gen4_vertex_flush(sna);
566
		gen4_magic_ca_pass(sna, op);
567
	}
568
	sna_vertex_wait__locked(&sna->render);
569
	_kgem_submit(&sna->kgem);
570
	emit_state(sna, op);
571
	goto start;
572
}
573
 
574
static uint32_t *
575
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
576
{
577
	sna->kgem.surface -=
578
		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
579
 
580
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
581
 
582
	/* Clear all surplus entries to zero in case of prefetch */
583
	*offset = sna->kgem.surface;
584
	return memset(sna->kgem.batch + sna->kgem.surface,
585
		      0, sizeof(struct gen4_surface_state_padded));
586
}
587
 
588
static void
589
gen4_emit_urb(struct sna *sna)
590
{
591
	int urb_vs_start, urb_vs_size;
592
	int urb_gs_start, urb_gs_size;
593
	int urb_clip_start, urb_clip_size;
594
	int urb_sf_start, urb_sf_size;
595
	int urb_cs_start, urb_cs_size;
596
 
597
	if (!sna->render_state.gen4.needs_urb)
598
		return;
599
 
600
	urb_vs_start = 0;
601
	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
602
	urb_gs_start = urb_vs_start + urb_vs_size;
603
	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
604
	urb_clip_start = urb_gs_start + urb_gs_size;
605
	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
606
	urb_sf_start = urb_clip_start + urb_clip_size;
607
	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
608
	urb_cs_start = urb_sf_start + urb_sf_size;
609
	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
610
 
611
	while ((sna->kgem.nbatch & 15) > 12)
612
		OUT_BATCH(MI_NOOP);
613
 
614
	OUT_BATCH(GEN4_URB_FENCE |
615
		  UF0_CS_REALLOC |
616
		  UF0_SF_REALLOC |
617
		  UF0_CLIP_REALLOC |
618
		  UF0_GS_REALLOC |
619
		  UF0_VS_REALLOC |
620
		  1);
621
	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
622
		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
623
		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
624
	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
625
		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
626
 
627
	/* Constant buffer state */
628
	OUT_BATCH(GEN4_CS_URB_STATE | 0);
629
	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
630
 
631
	sna->render_state.gen4.needs_urb = false;
632
}
633
 
634
static void
635
gen4_emit_state_base_address(struct sna *sna)
636
{
637
	assert(sna->render_state.gen4.general_bo->proxy == NULL);
638
	OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
639
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
640
				 sna->kgem.nbatch,
641
				 sna->render_state.gen4.general_bo,
642
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
643
				 BASE_ADDRESS_MODIFY));
644
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
645
				 sna->kgem.nbatch,
646
				 NULL,
647
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
648
				 BASE_ADDRESS_MODIFY));
649
	OUT_BATCH(0); /* media */
650
 
651
	/* upper bounds, all disabled */
652
	OUT_BATCH(BASE_ADDRESS_MODIFY);
653
	OUT_BATCH(0);
654
}
655
 
656
static void
657
gen4_emit_invariant(struct sna *sna)
658
{
659
	assert(sna->kgem.surface == sna->kgem.batch_size);
660
 
661
	if (sna->kgem.gen >= 045)
662
		OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
663
	else
664
		OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
665
 
666
	gen4_emit_state_base_address(sna);
667
 
668
	sna->render_state.gen4.needs_invariant = false;
669
}
670
 
671
static void
672
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
673
{
674
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
675
 
676
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
677
		DBG(("%s: flushing batch: %d < %d+%d\n",
678
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
679
		     150, 4*8));
680
		kgem_submit(&sna->kgem);
681
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
682
	}
683
 
684
	if (sna->render_state.gen4.needs_invariant)
685
		gen4_emit_invariant(sna);
686
}
687
 
688
static void
689
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
690
{
691
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
692
	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
693
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
694
			gen4_vertex_finish(sna);
695
 
696
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
697
		     sna->render_state.gen4.floats_per_vertex,
698
		     op->floats_per_vertex,
699
		     sna->render.vertex_index,
700
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
701
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
702
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
703
		sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
704
	}
705
}
706
 
707
static void
708
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
709
{
710
	if (sna->render_state.gen4.surface_table == offset)
711
		return;
712
 
713
	sna->render_state.gen4.surface_table = offset;
714
 
715
	/* Binding table pointers */
716
	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
717
	OUT_BATCH(0);		/* vs */
718
	OUT_BATCH(0);		/* gs */
719
	OUT_BATCH(0);		/* clip */
720
	OUT_BATCH(0);		/* sf */
721
	/* Only the PS uses the binding table */
722
	OUT_BATCH(offset*4);
723
}
724
 
725
static void
726
gen4_emit_pipelined_pointers(struct sna *sna,
727
			     const struct sna_composite_op *op,
728
			     int blend, int kernel)
729
{
730
	uint16_t sp, bp;
731
	uint32_t key;
732
 
733
	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
734
	     __FUNCTION__, op->u.gen4.ve_id & 2,
735
	     op->src.filter, op->src.repeat,
736
	     op->mask.filter, op->mask.repeat,
737
	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
738
 
739
	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
740
			    op->mask.filter, op->mask.repeat,
741
			    kernel);
742
	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
743
 
744
	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
745
	key = sp | (uint32_t)bp << 16;
746
	if (key == sna->render_state.gen4.last_pipelined_pointers)
747
		return;
748
 
749
	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
750
	OUT_BATCH(sna->render_state.gen4.vs);
751
	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
752
	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
753
	OUT_BATCH(sna->render_state.gen4.sf);
754
	OUT_BATCH(sna->render_state.gen4.wm + sp);
755
	OUT_BATCH(sna->render_state.gen4.cc + bp);
756
 
757
	sna->render_state.gen4.last_pipelined_pointers = key;
758
	gen4_emit_urb(sna);
759
}
760
 
761
static bool
762
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
763
{
764
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
765
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
766
 
767
	assert(!too_large(op->dst.x, op->dst.y));
768
	assert(!too_large(op->dst.width, op->dst.height));
769
 
770
	if (sna->render_state.gen4.drawrect_limit == limit &&
771
	    sna->render_state.gen4.drawrect_offset == offset)
772
		return true;
773
 
774
	sna->render_state.gen4.drawrect_offset = offset;
775
	sna->render_state.gen4.drawrect_limit = limit;
776
 
777
	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
778
	OUT_BATCH(0);
779
	OUT_BATCH(limit);
780
	OUT_BATCH(offset);
781
	return false;
782
}
783
 
784
static void
785
gen4_emit_vertex_elements(struct sna *sna,
786
			  const struct sna_composite_op *op)
787
{
788
	/*
789
	 * vertex data in vertex buffer
790
	 *    position: (x, y)
791
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
792
	 *    texture coordinate 1 if (has_mask is true): same as above
793
	 */
794
	struct gen4_render_state *render = &sna->render_state.gen4;
795
	uint32_t src_format, dw;
796
	int id = op->u.gen4.ve_id;
797
 
798
	if (render->ve_id == id)
799
		return;
800
	render->ve_id = id;
801
 
802
	/* The VUE layout
803
	 *    dword 0-3: position (x, y, 1.0, 1.0),
804
	 *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
805
	 *    [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
806
	 */
807
	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
808
 
809
	/* x,y */
810
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
811
		  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
812
 
813
	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
814
		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
815
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
816
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
817
		  (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
818
 
819
	/* u0, v0, w0 */
820
	/* u0, v0, w0 */
821
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
822
	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
823
	switch (id & 3) {
824
	default:
825
		assert(0);
826
	case 0:
827
		src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
828
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
829
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
830
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
831
		break;
832
	case 1:
833
		src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
834
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
835
		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
836
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
837
		break;
838
	case 2:
839
		src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
840
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
841
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
842
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
843
		break;
844
	case 3:
845
		src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
846
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
847
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
848
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
849
		break;
850
	}
851
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
852
		  src_format << VE0_FORMAT_SHIFT |
853
		  4 << VE0_OFFSET_SHIFT);
854
	OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
855
 
856
	/* u1, v1, w1 */
857
	if (id >> 2) {
858
		unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
859
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
860
		     id >> 2, src_offset));
861
		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
862
		switch (id >> 2) {
863
		case 1:
864
			src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
865
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
866
			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
867
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
868
			break;
869
		default:
870
			assert(0);
871
		case 2:
872
			src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
873
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
874
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
875
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
876
			break;
877
		case 3:
878
			src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
879
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
880
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
881
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
882
			break;
883
		}
884
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
885
			  src_format << VE0_FORMAT_SHIFT |
886
			  src_offset << VE0_OFFSET_SHIFT);
887
		OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
888
	} else {
889
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
890
			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
891
 
892
		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
893
			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
894
			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
895
			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
896
			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
897
	}
898
}
899
 
900
static void
901
gen4_emit_state(struct sna *sna,
902
		const struct sna_composite_op *op,
903
		uint16_t wm_binding_table)
904
{
905
	bool flush;
906
 
907
	flush = wm_binding_table & 1;
908
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
909
		DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
910
		     kgem_bo_is_dirty(op->src.bo),
911
		     kgem_bo_is_dirty(op->mask.bo),
912
		     flush));
913
		OUT_BATCH(MI_FLUSH);
914
		kgem_clear_dirty(&sna->kgem);
915
		kgem_bo_mark_dirty(op->dst.bo);
916
		flush = false;
917
	}
918
	flush &= gen4_emit_drawing_rectangle(sna, op);
919
	if (flush && op->op > PictOpSrc)
920
		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
921
 
922
	gen4_emit_binding_table(sna, wm_binding_table & ~1);
923
	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
924
	gen4_emit_vertex_elements(sna, op);
925
}
926
 
927
static void
928
gen4_bind_surfaces(struct sna *sna,
929
		   const struct sna_composite_op *op)
930
{
931
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
932
	uint32_t *binding_table;
933
	uint16_t offset;
934
 
935
	gen4_get_batch(sna, op);
936
 
937
	binding_table = gen4_composite_get_binding_table(sna, &offset);
938
 
939
	binding_table[0] =
940
		gen4_bind_bo(sna,
941
			    op->dst.bo, op->dst.width, op->dst.height,
942
			    gen4_get_dest_format(op->dst.format),
943
			    true);
944
	binding_table[1] =
945
		gen4_bind_bo(sna,
946
			     op->src.bo, op->src.width, op->src.height,
947
			     op->src.card_format,
948
			     false);
949
	if (op->mask.bo) {
950
		assert(op->u.gen4.ve_id >> 2);
951
		binding_table[2] =
952
			gen4_bind_bo(sna,
953
				     op->mask.bo,
954
				     op->mask.width,
955
				     op->mask.height,
956
				     op->mask.card_format,
957
				     false);
958
	}
959
 
960
	if (sna->kgem.surface == offset &&
961
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
962
	    (op->mask.bo == NULL ||
963
	     sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
964
		sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
965
		offset = sna->render_state.gen4.surface_table;
966
	}
967
 
968
	gen4_emit_state(sna, op, offset | dirty);
969
}
970
 
971
fastcall static void
972
gen4_render_composite_blt(struct sna *sna,
973
			  const struct sna_composite_op *op,
974
			  const struct sna_composite_rectangles *r)
975
{
976
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
977
	     __FUNCTION__,
978
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
979
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
980
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
981
	     r->width, r->height));
982
 
983
	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
984
	op->prim_emit(sna, op, r);
985
}
986
 
987
 
988
 
989
 
990
 
991
 
992
 
993
 
994
 
995
 
996
 
997
 
998
 
999
 
1000
 
1001
 
1002
 
1003
 
1004
static void
1005
gen4_render_composite_done(struct sna *sna,
1006
			   const struct sna_composite_op *op)
1007
{
1008
	DBG(("%s()\n", __FUNCTION__));
1009
 
1010
	if (sna->render.vertex_offset) {
1011
		gen4_vertex_flush(sna);
1012
		gen4_magic_ca_pass(sna, op);
1013
	}
1014
 
1015
}
1016
 
1017
 
1018
 
1019
 
1020
 
1021
 
1022
 
1023
 
1024
 
1025
 
1026
 
1027
 
1028
 
1029
 
1030
 
1031
 
1032
 
1033
 
1034
 
1035
 
1036
 
1037
 
1038
 
1039
 
1040
 
1041
 
1042
 
1043
 
1044
 
1045
 
1046
 
1047
 
1048
 
1049
 
1050
 
1051
 
1052
 
1053
 
1054
 
1055
 
1056
 
1057
static bool
1058
gen4_blit_tex(struct sna *sna,
3769 Serge 1059
              uint8_t op, bool scale,
3291 Serge 1060
		      PixmapPtr src, struct kgem_bo *src_bo,
1061
		      PixmapPtr mask,struct kgem_bo *mask_bo,
1062
		      PixmapPtr dst, struct kgem_bo *dst_bo,
1063
              int32_t src_x, int32_t src_y,
1064
              int32_t msk_x, int32_t msk_y,
1065
              int32_t dst_x, int32_t dst_y,
1066
              int32_t width, int32_t height,
1067
              struct sna_composite_op *tmp)
1068
{
1069
 
1070
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1071
         width, height, sna->kgem.ring));
1072
 
1073
    tmp->op = PictOpSrc;
1074
 
1075
    tmp->dst.pixmap = dst;
1076
    tmp->dst.bo     = dst_bo;
1077
    tmp->dst.width  = dst->drawable.width;
1078
    tmp->dst.height = dst->drawable.height;
1079
    tmp->dst.format = PICT_x8r8g8b8;
1080
 
1081
 
1082
	tmp->src.repeat = RepeatNone;
1083
	tmp->src.filter = PictFilterNearest;
1084
    tmp->src.is_affine = true;
1085
 
1086
    tmp->src.bo = src_bo;
1087
	tmp->src.pict_format = PICT_x8r8g8b8;
1088
    tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
1089
    tmp->src.width  = src->drawable.width;
1090
    tmp->src.height = src->drawable.height;
1091
 
1092
	tmp->is_affine = tmp->src.is_affine;
1093
	tmp->has_component_alpha = false;
1094
	tmp->need_magic_ca_pass = false;
1095
 
1096
 	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
1097
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
1098
    tmp->mask.is_affine = true;
1099
 
1100
    tmp->mask.bo = mask_bo;
1101
    tmp->mask.pict_format = PIXMAN_a8;
1102
    tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
1103
    tmp->mask.width  = mask->drawable.width;
1104
    tmp->mask.height = mask->drawable.height;
1105
 
3769 Serge 1106
    if( scale )
1107
    {
1108
        tmp->src.scale[0] = 1.f/width;
1109
        tmp->src.scale[1] = 1.f/height;
1110
    }
1111
    else
1112
    {
1113
        tmp->src.scale[0] = 1.f/src->drawable.width;
1114
        tmp->src.scale[1] = 1.f/src->drawable.height;
1115
    }
3291 Serge 1116
//    tmp->src.offset[0] = -dst_x;
1117
//    tmp->src.offset[1] = -dst_y;
1118
 
1119
 
1120
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
1121
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
1122
//    tmp->mask.offset[0] = -dst_x;
1123
//    tmp->mask.offset[1] = -dst_y;
1124
 
1125
	tmp->u.gen4.wm_kernel =
1126
		gen4_choose_composite_kernel(tmp->op,
1127
					     tmp->mask.bo != NULL,
1128
					     tmp->has_component_alpha,
1129
					     tmp->is_affine);
1130
	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
1131
 
1132
	tmp->blt   = gen4_render_composite_blt;
1133
	tmp->done  = gen4_render_composite_done;
1134
 
1135
	if (!kgem_check_bo(&sna->kgem,
1136
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
1137
			   NULL)) {
1138
		kgem_submit(&sna->kgem);
1139
	}
1140
 
1141
	gen4_bind_surfaces(sna, tmp);
1142
	gen4_align_vertex(sna, tmp);
1143
	return true;
1144
}
1145
 
1146
static void
1147
gen4_render_flush(struct sna *sna)
1148
{
1149
	gen4_vertex_close(sna);
1150
 
1151
	assert(sna->render.vb_id == 0);
1152
	assert(sna->render.vertex_offset == 0);
1153
}
1154
 
1155
static void
1156
discard_vbo(struct sna *sna)
1157
{
1158
	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1159
	sna->render.vbo = NULL;
1160
	sna->render.vertices = sna->render.vertex_data;
1161
	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1162
	sna->render.vertex_used = 0;
1163
	sna->render.vertex_index = 0;
1164
}
1165
 
1166
static void
1167
gen4_render_retire(struct kgem *kgem)
1168
{
1169
	struct sna *sna;
1170
 
1171
	sna = container_of(kgem, struct sna, kgem);
1172
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
1173
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
1174
		sna->render.vertex_used = 0;
1175
		sna->render.vertex_index = 0;
1176
	}
1177
}
1178
 
1179
static void
1180
gen4_render_expire(struct kgem *kgem)
1181
{
1182
	struct sna *sna;
1183
 
1184
	sna = container_of(kgem, struct sna, kgem);
1185
	if (sna->render.vbo && !sna->render.vertex_used) {
1186
		DBG(("%s: discarding vbo\n", __FUNCTION__));
1187
		discard_vbo(sna);
1188
	}
1189
}
1190
 
1191
static void gen4_render_reset(struct sna *sna)
1192
{
1193
	sna->render_state.gen4.needs_invariant = true;
1194
	sna->render_state.gen4.needs_urb = true;
1195
	sna->render_state.gen4.ve_id = -1;
1196
	sna->render_state.gen4.last_primitive = -1;
1197
	sna->render_state.gen4.last_pipelined_pointers = -1;
1198
 
1199
	sna->render_state.gen4.drawrect_offset = -1;
1200
	sna->render_state.gen4.drawrect_limit = -1;
1201
	sna->render_state.gen4.surface_table = -1;
1202
 
1203
	if (sna->render.vbo &&
1204
	    !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
1205
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
1206
		discard_vbo(sna);
1207
	}
1208
 
1209
	sna->render.vertex_offset = 0;
1210
	sna->render.nvertex_reloc = 0;
1211
	sna->render.vb_id = 0;
1212
}
1213
 
1214
static void gen4_render_fini(struct sna *sna)
1215
{
1216
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
1217
}
1218
 
1219
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
1220
{
1221
	struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
1222
 
1223
	/* Set up the vertex shader to be disabled (passthrough) */
1224
	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
1225
	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
1226
	vs->vs6.vs_enable = 0;
1227
	vs->vs6.vert_cache_disable = 1;
1228
 
1229
	return sna_static_stream_offsetof(stream, vs);
1230
}
1231
 
1232
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
1233
				     int gen, uint32_t kernel)
1234
{
1235
	struct gen4_sf_unit_state *sf;
1236
 
1237
	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
1238
 
1239
	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
1240
	sf->thread0.kernel_start_pointer = kernel >> 6;
1241
	sf->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
1242
	sf->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
1243
	sf->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
1244
	/* don't smash vertex header, read start from dw8 */
1245
	sf->thread3.urb_entry_read_offset = 1;
1246
	sf->thread3.dispatch_grf_start_reg = 3;
1247
	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
1248
	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
1249
	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
1250
	sf->sf5.viewport_transform = false;	/* skip viewport */
1251
	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
1252
	sf->sf6.scissor = 0;
1253
	sf->sf7.trifan_pv = 2;
1254
	sf->sf6.dest_org_vbias = 0x8;
1255
	sf->sf6.dest_org_hbias = 0x8;
1256
 
1257
	return sna_static_stream_offsetof(stream, sf);
1258
}
1259
 
1260
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
1261
					  sampler_filter_t src_filter,
1262
					  sampler_extend_t src_extend,
1263
					  sampler_filter_t mask_filter,
1264
					  sampler_extend_t mask_extend)
1265
{
1266
	struct gen4_sampler_state *sampler_state;
1267
 
1268
	sampler_state = sna_static_stream_map(stream,
1269
					      sizeof(struct gen4_sampler_state) * 2,
1270
					      32);
1271
	sampler_state_init(&sampler_state[0], src_filter, src_extend);
1272
	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
1273
 
1274
	return sna_static_stream_offsetof(stream, sampler_state);
1275
}
1276
 
1277
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
1278
			       int gen,
1279
			       bool has_mask,
1280
			       uint32_t kernel,
1281
			       uint32_t sampler)
1282
{
1283
	assert((kernel & 63) == 0);
1284
	wm->thread0.kernel_start_pointer = kernel >> 6;
1285
	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
1286
 
1287
	wm->thread1.single_program_flow = 0;
1288
 
1289
	wm->thread3.const_urb_entry_read_length = 0;
1290
	wm->thread3.const_urb_entry_read_offset = 0;
1291
 
1292
	wm->thread3.urb_entry_read_offset = 0;
1293
	wm->thread3.dispatch_grf_start_reg = 3;
1294
 
1295
	assert((sampler & 31) == 0);
1296
	wm->wm4.sampler_state_pointer = sampler >> 5;
1297
	wm->wm4.sampler_count = 1;
1298
 
1299
	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
1300
	wm->wm5.transposed_urb_read = 0;
1301
	wm->wm5.thread_dispatch_enable = 1;
1302
	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
1303
	 * start point
1304
	 */
1305
	wm->wm5.enable_16_pix = 1;
1306
	wm->wm5.enable_8_pix = 0;
1307
	wm->wm5.early_depth_test = 1;
1308
 
1309
	/* Each pair of attributes (src/mask coords) is two URB entries */
1310
	if (has_mask) {
1311
		wm->thread1.binding_table_entry_count = 3;
1312
		wm->thread3.urb_entry_read_length = 4;
1313
	} else {
1314
		wm->thread1.binding_table_entry_count = 2;
1315
		wm->thread3.urb_entry_read_length = 2;
1316
	}
1317
}
1318
 
1319
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
1320
{
1321
	uint8_t *ptr, *base;
1322
	int i, j;
1323
 
1324
	base = ptr =
1325
		sna_static_stream_map(stream,
1326
				      GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
1327
				      64);
1328
 
1329
	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
1330
		for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
1331
			struct gen4_cc_unit_state *state =
1332
				(struct gen4_cc_unit_state *)ptr;
1333
 
1334
			state->cc3.blend_enable =
1335
				!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
1336
 
1337
			state->cc5.logicop_func = 0xc;	/* COPY */
1338
			state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
1339
 
1340
			/* Fill in alpha blend factors same as color, for the future. */
1341
			state->cc5.ia_src_blend_factor = i;
1342
			state->cc5.ia_dest_blend_factor = j;
1343
 
1344
			state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
1345
			state->cc6.clamp_post_alpha_blend = 1;
1346
			state->cc6.clamp_pre_alpha_blend = 1;
1347
			state->cc6.src_blend_factor = i;
1348
			state->cc6.dest_blend_factor = j;
1349
 
1350
			ptr += 64;
1351
		}
1352
	}
1353
 
1354
	return sna_static_stream_offsetof(stream, base);
1355
}
1356
 
1357
static bool gen4_render_setup(struct sna *sna)
1358
{
1359
	struct gen4_render_state *state = &sna->render_state.gen4;
1360
	struct sna_static_stream general;
1361
	struct gen4_wm_unit_state_padded *wm_state;
1362
	uint32_t sf, wm[KERNEL_COUNT];
1363
	int i, j, k, l, m;
1364
 
1365
	sna_static_stream_init(&general);
1366
 
1367
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
1368
	 * dumps, you know it points to zero.
1369
	 */
1370
	null_create(&general);
1371
 
1372
	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
1373
	for (m = 0; m < KERNEL_COUNT; m++) {
1374
		if (wm_kernels[m].size) {
1375
			wm[m] = sna_static_stream_add(&general,
1376
						      wm_kernels[m].data,
1377
						      wm_kernels[m].size,
1378
						      64);
1379
		} else {
1380
			wm[m] = sna_static_stream_compile_wm(sna, &general,
1381
							     wm_kernels[m].data,
1382
							     16);
1383
		}
1384
	}
1385
 
1386
	state->vs = gen4_create_vs_unit_state(&general);
1387
	state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
1388
 
1389
	wm_state = sna_static_stream_map(&general,
1390
					  sizeof(*wm_state) * KERNEL_COUNT *
1391
					  FILTER_COUNT * EXTEND_COUNT *
1392
					  FILTER_COUNT * EXTEND_COUNT,
1393
					  64);
1394
	state->wm = sna_static_stream_offsetof(&general, wm_state);
1395
	for (i = 0; i < FILTER_COUNT; i++) {
1396
		for (j = 0; j < EXTEND_COUNT; j++) {
1397
			for (k = 0; k < FILTER_COUNT; k++) {
1398
				for (l = 0; l < EXTEND_COUNT; l++) {
1399
					uint32_t sampler_state;
1400
 
1401
					sampler_state =
1402
						gen4_create_sampler_state(&general,
1403
									  i, j,
1404
									  k, l);
1405
 
1406
					for (m = 0; m < KERNEL_COUNT; m++) {
1407
						gen4_init_wm_state(&wm_state->state,
1408
								   sna->kgem.gen,
1409
								   wm_kernels[m].has_mask,
1410
								   wm[m], sampler_state);
1411
						wm_state++;
1412
					}
1413
				}
1414
			}
1415
		}
1416
	}
1417
 
1418
	state->cc = gen4_create_cc_unit_state(&general);
1419
 
1420
	state->general_bo = sna_static_stream_fini(sna, &general);
1421
	return state->general_bo != NULL;
1422
}
1423
 
1424
 
1425
bool gen4_render_init(struct sna *sna)
1426
{
1427
	if (!gen4_render_setup(sna))
1428
		return false;
1429
 
1430
	sna->kgem.retire = gen4_render_retire;
1431
	sna->kgem.expire = gen4_render_expire;
1432
 
1433
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
1434
 
1435
    sna->render.blit_tex = gen4_blit_tex;
1436
 
1437
 
1438
	sna->render.flush = gen4_render_flush;
1439
	sna->render.reset = gen4_render_reset;
1440
	sna->render.fini = gen4_render_fini;
1441
 
1442
	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
1443
	sna->render.max_3d_pitch = 1 << 18;
1444
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
1445
 
1446
	return true;
1447
}
1448