Subversion Repositories Kolibri OS

Rev

Rev 4359 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4304 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
4501 Serge 44
#include "gen4_common.h"
4304 Serge 45
#include "gen4_render.h"
46
#include "gen4_source.h"
47
#include "gen4_vertex.h"
48
 
49
/* gen4 has a serious issue with its shaders that we need to flush
50
 * after every rectangle... So until that is resolved, prefer
51
 * the BLT engine.
52
 */
53
#define FORCE_SPANS 0
54
#define FORCE_NONRECTILINEAR_SPANS -1
55
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
56
 
57
#define NO_COMPOSITE 0
58
#define NO_COMPOSITE_SPANS 0
59
#define NO_COPY 0
60
#define NO_COPY_BOXES 0
61
#define NO_FILL 0
62
#define NO_FILL_ONE 0
63
#define NO_FILL_BOXES 0
64
#define NO_VIDEO 0
65
 
66
#define MAX_FLUSH_VERTICES 6
67
 
68
#define GEN4_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
69
 
70
/* Set up a default static partitioning of the URB, which is supposed to
71
 * allow anything we would want to do, at potentially lower performance.
72
 */
73
#define URB_CS_ENTRY_SIZE     1
74
#define URB_CS_ENTRIES        0
75
 
76
#define URB_VS_ENTRY_SIZE     1
77
#define URB_VS_ENTRIES        32
78
 
79
#define URB_GS_ENTRY_SIZE     0
80
#define URB_GS_ENTRIES        0
81
 
82
#define URB_CLIP_ENTRY_SIZE   0
83
#define URB_CLIP_ENTRIES      0
84
 
85
#define URB_SF_ENTRY_SIZE     2
86
#define URB_SF_ENTRIES        64
87
 
88
/*
89
 * this program computes dA/dx and dA/dy for the texture coordinates along
90
 * with the base texture coordinate. It was extracted from the Mesa driver
91
 */
92
 
93
#define SF_KERNEL_NUM_GRF 16
94
#define PS_KERNEL_NUM_GRF 32
95
 
96
#define GEN4_MAX_SF_THREADS 24
97
#define GEN4_MAX_WM_THREADS 32
98
#define G4X_MAX_WM_THREADS 50
99
 
100
static const uint32_t ps_kernel_packed_static[][4] = {
101
#include "exa_wm_xy.g4b"
102
#include "exa_wm_src_affine.g4b"
103
#include "exa_wm_src_sample_argb.g4b"
104
#include "exa_wm_yuv_rgb.g4b"
105
#include "exa_wm_write.g4b"
106
};
107
 
108
static const uint32_t ps_kernel_planar_static[][4] = {
109
#include "exa_wm_xy.g4b"
110
#include "exa_wm_src_affine.g4b"
111
#include "exa_wm_src_sample_planar.g4b"
112
#include "exa_wm_yuv_rgb.g4b"
113
#include "exa_wm_write.g4b"
114
};
115
 
116
#define NOKERNEL(kernel_enum, func, masked) \
117
    [kernel_enum] = {func, 0, masked}
118
#define KERNEL(kernel_enum, kernel, masked) \
119
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
120
static const struct wm_kernel_info {
121
    const void *data;
122
    unsigned int size;
123
    bool has_mask;
124
} wm_kernels[] = {
125
    NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
126
    NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
127
 
128
    NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
129
    NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
130
 
131
    NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
132
    NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
133
 
134
    NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
135
    NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
136
 
137
    NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
138
    NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
139
 
140
    KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
141
    KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
142
};
143
#undef KERNEL
144
 
145
static const struct blendinfo {
146
    bool src_alpha;
147
    uint32_t src_blend;
148
    uint32_t dst_blend;
149
} gen4_blend_op[] = {
150
    /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
151
    /* Src */   {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
152
    /* Dst */   {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
153
    /* Over */  {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
154
    /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
155
    /* In */    {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
156
    /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
157
    /* Out */   {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
158
    /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
159
    /* Atop */  {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
160
    /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
161
    /* Xor */   {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
162
    /* Add */   {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
163
};
164
 
165
/**
166
 * Highest-valued BLENDFACTOR used in gen4_blend_op.
167
 *
168
 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
169
 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
170
 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
171
 */
172
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
173
 
174
#define BLEND_OFFSET(s, d) \
175
    (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
176
 
177
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
178
    ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
179
 
180
static void
181
gen4_emit_pipelined_pointers(struct sna *sna,
182
                 const struct sna_composite_op *op,
183
                 int blend, int kernel);
184
 
185
#define OUT_BATCH(v) batch_emit(sna, v)
186
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
187
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
188
 
189
#define GEN4_MAX_3D_SIZE 8192
190
 
191
static inline bool too_large(int width, int height)
192
{
193
    return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
194
}
195
 
196
static int
197
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
198
{
199
    int base;
200
 
201
    if (has_mask) {
202
        if (is_ca) {
203
            if (gen4_blend_op[op].src_alpha)
204
                base = WM_KERNEL_MASKSA;
205
            else
206
                base = WM_KERNEL_MASKCA;
207
        } else
208
            base = WM_KERNEL_MASK;
209
    } else
210
        base = WM_KERNEL;
211
 
212
    return base + !is_affine;
213
}
214
 
215
static bool gen4_magic_ca_pass(struct sna *sna,
216
                   const struct sna_composite_op *op)
217
{
218
    struct gen4_render_state *state = &sna->render_state.gen4;
219
 
220
    if (!op->need_magic_ca_pass)
221
        return false;
222
 
223
    assert(sna->render.vertex_index > sna->render.vertex_start);
224
 
225
    DBG(("%s: CA fixup\n", __FUNCTION__));
226
    assert(op->mask.bo != NULL);
227
    assert(op->has_component_alpha);
228
 
229
    gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
230
                     gen4_choose_composite_kernel(PictOpAdd,
231
                                  true, true, op->is_affine));
232
 
233
    OUT_BATCH(GEN4_3DPRIMITIVE |
234
          GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
235
          (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
236
          (0 << 9) |
237
          4);
238
    OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
239
    OUT_BATCH(sna->render.vertex_start);
240
    OUT_BATCH(1);   /* single instance */
241
    OUT_BATCH(0);   /* start instance location */
242
    OUT_BATCH(0);   /* index buffer offset, ignored */
243
 
244
    state->last_primitive = sna->kgem.nbatch;
245
    return true;
246
}
247
 
248
static uint32_t gen4_get_blend(int op,
249
                   bool has_component_alpha,
250
                   uint32_t dst_format)
251
{
252
    uint32_t src, dst;
253
 
254
    src = GEN4_BLENDFACTOR_ONE;  //gen4_blend_op[op].src_blend;
255
    dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
256
#if 0
257
    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
258
     * it as always 1.
259
     */
260
    if (PICT_FORMAT_A(dst_format) == 0) {
261
        if (src == GEN4_BLENDFACTOR_DST_ALPHA)
262
            src = GEN4_BLENDFACTOR_ONE;
263
        else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
264
            src = GEN4_BLENDFACTOR_ZERO;
265
    }
266
 
267
    /* If the source alpha is being used, then we should only be in a
268
     * case where the source blend factor is 0, and the source blend
269
     * value is the mask channels multiplied by the source picture's alpha.
270
     */
271
    if (has_component_alpha && gen4_blend_op[op].src_alpha) {
272
        if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
273
            dst = GEN4_BLENDFACTOR_SRC_COLOR;
274
        else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
275
            dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
276
    }
277
#endif
278
    DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
279
         op, dst_format, PICT_FORMAT_A(dst_format),
280
         src, dst, BLEND_OFFSET(src, dst)));
281
    return BLEND_OFFSET(src, dst);
282
}
283
 
284
static uint32_t gen4_get_card_format(PictFormat format)
285
{
286
    switch (format) {
287
    default:
288
        return -1;
289
    case PICT_a8r8g8b8:
290
        return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
291
    case PICT_x8r8g8b8:
292
        return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
293
	case PICT_a8b8g8r8:
294
		return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
295
	case PICT_x8b8g8r8:
296
		return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
297
	case PICT_a2r10g10b10:
298
		return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
299
	case PICT_x2r10g10b10:
300
		return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
301
	case PICT_r8g8b8:
302
		return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
303
	case PICT_r5g6b5:
304
		return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
305
	case PICT_a1r5g5b5:
306
		return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
307
    case PICT_a8:
308
        return GEN4_SURFACEFORMAT_A8_UNORM;
309
	case PICT_a4r4g4b4:
310
		return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
311
    }
312
}
313
 
314
static uint32_t gen4_get_dest_format(PictFormat format)
315
{
316
    switch (format) {
317
    default:
318
        return -1;
319
    case PICT_a8r8g8b8:
320
    case PICT_x8r8g8b8:
321
        return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
322
	case PICT_a8b8g8r8:
323
	case PICT_x8b8g8r8:
324
		return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
325
	case PICT_a2r10g10b10:
326
	case PICT_x2r10g10b10:
327
		return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
328
	case PICT_r5g6b5:
329
		return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
330
	case PICT_x1r5g5b5:
331
	case PICT_a1r5g5b5:
332
		return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
333
    case PICT_a8:
334
        return GEN4_SURFACEFORMAT_A8_UNORM;
335
	case PICT_a4r4g4b4:
336
	case PICT_x4r4g4b4:
337
		return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
338
    }
339
}
340
 
341
typedef struct gen4_surface_state_padded {
342
	struct gen4_surface_state state;
343
	char pad[32 - sizeof(struct gen4_surface_state)];
344
} gen4_surface_state_padded;
345
 
346
static void null_create(struct sna_static_stream *stream)
347
{
348
	/* A bunch of zeros useful for legacy border color and depth-stencil */
349
	sna_static_stream_map(stream, 64, 64);
350
}
351
 
352
static void
353
sampler_state_init(struct gen4_sampler_state *sampler_state,
354
		   sampler_filter_t filter,
355
		   sampler_extend_t extend)
356
{
357
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
358
 
359
	/* We use the legacy mode to get the semantics specified by
360
	 * the Render extension. */
361
	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
362
 
363
	switch (filter) {
364
	default:
365
	case SAMPLER_FILTER_NEAREST:
366
		sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
367
		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
368
		break;
369
	case SAMPLER_FILTER_BILINEAR:
370
		sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
371
		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
372
		break;
373
	}
374
 
375
	switch (extend) {
376
	default:
377
	case SAMPLER_EXTEND_NONE:
378
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
379
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
380
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
381
		break;
382
	case SAMPLER_EXTEND_REPEAT:
383
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
384
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
385
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
386
		break;
387
	case SAMPLER_EXTEND_PAD:
388
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
389
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
390
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
391
		break;
392
	case SAMPLER_EXTEND_REFLECT:
393
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
394
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
395
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
396
		break;
397
	}
398
}
399
 
400
static uint32_t
401
gen4_tiling_bits(uint32_t tiling)
402
{
403
	switch (tiling) {
404
	default: assert(0);
405
	case I915_TILING_NONE: return 0;
406
	case I915_TILING_X: return GEN4_SURFACE_TILED;
407
	case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
408
	}
409
}
410
 
411
/**
412
 * Sets up the common fields for a surface state buffer for the given
413
 * picture in the given surface state buffer.
414
 */
415
static uint32_t
416
gen4_bind_bo(struct sna *sna,
417
	     struct kgem_bo *bo,
418
	     uint32_t width,
419
	     uint32_t height,
420
	     uint32_t format,
421
	     bool is_dst)
422
{
423
	uint32_t domains;
424
	uint16_t offset;
425
	uint32_t *ss;
426
 
427
	assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
428
 
429
	/* After the first bind, we manage the cache domains within the batch */
430
	offset = kgem_bo_get_binding(bo, format | is_dst << 31);
431
	if (offset) {
432
		if (is_dst)
433
			kgem_bo_mark_dirty(bo);
434
		return offset * sizeof(uint32_t);
435
	}
436
 
437
	offset = sna->kgem.surface -=
438
		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
439
	ss = sna->kgem.batch + offset;
440
 
441
	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
442
		 GEN4_SURFACE_BLEND_ENABLED |
443
		 format << GEN4_SURFACE_FORMAT_SHIFT);
444
 
445
	if (is_dst) {
446
		ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
447
		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
448
	} else
449
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
450
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
451
 
452
	ss[2] = ((width - 1)  << GEN4_SURFACE_WIDTH_SHIFT |
453
		 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
454
	ss[3] = (gen4_tiling_bits(bo->tiling) |
455
		 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
456
	ss[4] = 0;
457
	ss[5] = 0;
458
 
459
	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
460
 
461
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
462
	     offset, bo->handle, ss[1],
463
	     format, width, height, bo->pitch, bo->tiling,
464
	     domains & 0xffff ? "render" : "sampler"));
465
 
466
	return offset * sizeof(uint32_t);
467
}
468
 
469
static void gen4_emit_vertex_buffer(struct sna *sna,
470
				    const struct sna_composite_op *op)
471
{
472
	int id = op->u.gen4.ve_id;
473
 
474
	assert((sna->render.vb_id & (1 << id)) == 0);
475
 
476
	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
477
	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
478
		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
479
	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
480
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
481
	OUT_BATCH(0);
482
	OUT_BATCH(0);
483
	OUT_BATCH(0);
484
 
485
	sna->render.vb_id |= 1 << id;
486
}
487
 
488
static void gen4_emit_primitive(struct sna *sna)
489
{
490
	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
491
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
492
		return;
493
	}
494
 
495
	OUT_BATCH(GEN4_3DPRIMITIVE |
496
		  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
497
		  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
498
		  (0 << 9) |
499
		  4);
500
	sna->render.vertex_offset = sna->kgem.nbatch;
501
	OUT_BATCH(0);	/* vertex count, to be filled in later */
502
	OUT_BATCH(sna->render.vertex_index);
503
	OUT_BATCH(1);	/* single instance */
504
	OUT_BATCH(0);	/* start instance location */
505
	OUT_BATCH(0);	/* index buffer offset, ignored */
506
	sna->render.vertex_start = sna->render.vertex_index;
507
 
508
	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
509
}
510
 
511
static bool gen4_rectangle_begin(struct sna *sna,
512
				 const struct sna_composite_op *op)
513
{
514
	unsigned int id = 1 << op->u.gen4.ve_id;
515
	int ndwords;
516
 
517
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
518
		return true;
519
 
520
	/* 7xpipelined pointers + 6xprimitive + 1xflush */
521
	ndwords = op->need_magic_ca_pass? 20 : 6;
522
	if ((sna->render.vb_id & id) == 0)
523
		ndwords += 5;
524
	ndwords += 2*FORCE_FLUSH;
525
 
526
	if (!kgem_check_batch(&sna->kgem, ndwords))
527
		return false;
528
 
529
	if ((sna->render.vb_id & id) == 0)
530
		gen4_emit_vertex_buffer(sna, op);
531
	if (sna->render.vertex_offset == 0)
532
		gen4_emit_primitive(sna);
533
 
534
	return true;
535
}
536
 
537
static int gen4_get_rectangles__flush(struct sna *sna,
538
				      const struct sna_composite_op *op)
539
{
540
	/* Preventing discarding new vbo after lock contention */
541
	if (sna_vertex_wait__locked(&sna->render)) {
542
		int rem = vertex_space(sna);
543
		if (rem > op->floats_per_rect)
544
			return rem;
545
	}
546
 
547
	if (!kgem_check_batch(&sna->kgem,
548
			      2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
549
		return 0;
550
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
551
		return 0;
552
 
553
	if (sna->render.vertex_offset) {
554
		gen4_vertex_flush(sna);
555
		if (gen4_magic_ca_pass(sna, op))
556
			gen4_emit_pipelined_pointers(sna, op, op->op,
557
						     op->u.gen4.wm_kernel);
558
	}
559
 
560
	return gen4_vertex_finish(sna);
561
}
562
 
563
inline static int gen4_get_rectangles(struct sna *sna,
564
				      const struct sna_composite_op *op,
565
				      int want,
566
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
567
{
568
	int rem;
569
 
570
	assert(want);
571
#if FORCE_FLUSH
572
	rem = sna->render.vertex_offset;
573
	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
574
		rem = sna->kgem.nbatch - 5;
575
	if (rem) {
576
		rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
577
		if (rem <= 0) {
578
			if (sna->render.vertex_offset) {
579
				gen4_vertex_flush(sna);
580
				if (gen4_magic_ca_pass(sna, op))
581
					gen4_emit_pipelined_pointers(sna, op, op->op,
582
								     op->u.gen4.wm_kernel);
583
			}
584
			OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
585
			rem = MAX_FLUSH_VERTICES;
586
		}
587
	} else
588
		rem = MAX_FLUSH_VERTICES;
589
	if (want > rem)
590
		want = rem;
591
#endif
592
 
593
start:
594
	rem = vertex_space(sna);
595
	if (unlikely(rem < op->floats_per_rect)) {
596
		DBG(("flushing vbo for %s: %d < %d\n",
597
		     __FUNCTION__, rem, op->floats_per_rect));
598
		rem = gen4_get_rectangles__flush(sna, op);
599
		if (unlikely(rem == 0))
600
			goto flush;
601
	}
602
 
603
	if (unlikely(sna->render.vertex_offset == 0)) {
604
		if (!gen4_rectangle_begin(sna, op))
605
			goto flush;
606
		else
607
			goto start;
608
	}
609
 
610
	assert(rem <= vertex_space(sna));
611
	assert(op->floats_per_rect <= rem);
612
	if (want > 1 && want * op->floats_per_rect > rem)
613
		want = rem / op->floats_per_rect;
614
 
615
	sna->render.vertex_index += 3*want;
616
	return want;
617
 
618
flush:
619
	if (sna->render.vertex_offset) {
620
		gen4_vertex_flush(sna);
621
		gen4_magic_ca_pass(sna, op);
622
	}
623
	sna_vertex_wait__locked(&sna->render);
624
	_kgem_submit(&sna->kgem);
625
	emit_state(sna, op);
626
	goto start;
627
}
628
 
629
static uint32_t *
630
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
631
{
632
	sna->kgem.surface -=
633
		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
634
 
635
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
636
 
637
	/* Clear all surplus entries to zero in case of prefetch */
638
	*offset = sna->kgem.surface;
639
	return memset(sna->kgem.batch + sna->kgem.surface,
640
		      0, sizeof(struct gen4_surface_state_padded));
641
}
642
 
643
static void
644
gen4_emit_urb(struct sna *sna)
645
{
646
	int urb_vs_start, urb_vs_size;
647
	int urb_gs_start, urb_gs_size;
648
	int urb_clip_start, urb_clip_size;
649
	int urb_sf_start, urb_sf_size;
650
	int urb_cs_start, urb_cs_size;
651
 
652
	if (!sna->render_state.gen4.needs_urb)
653
		return;
654
 
655
	urb_vs_start = 0;
656
	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
657
	urb_gs_start = urb_vs_start + urb_vs_size;
658
	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
659
	urb_clip_start = urb_gs_start + urb_gs_size;
660
	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
661
	urb_sf_start = urb_clip_start + urb_clip_size;
662
	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
663
	urb_cs_start = urb_sf_start + urb_sf_size;
664
	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
665
 
666
	while ((sna->kgem.nbatch & 15) > 12)
667
		OUT_BATCH(MI_NOOP);
668
 
669
	OUT_BATCH(GEN4_URB_FENCE |
670
		  UF0_CS_REALLOC |
671
		  UF0_SF_REALLOC |
672
		  UF0_CLIP_REALLOC |
673
		  UF0_GS_REALLOC |
674
		  UF0_VS_REALLOC |
675
		  1);
676
	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
677
		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
678
		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
679
	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
680
		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
681
 
682
	/* Constant buffer state */
683
	OUT_BATCH(GEN4_CS_URB_STATE | 0);
684
	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
685
 
686
	sna->render_state.gen4.needs_urb = false;
687
}
688
 
689
static void
690
gen4_emit_state_base_address(struct sna *sna)
691
{
692
	assert(sna->render_state.gen4.general_bo->proxy == NULL);
693
	OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
694
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
695
				 sna->kgem.nbatch,
696
				 sna->render_state.gen4.general_bo,
697
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
698
				 BASE_ADDRESS_MODIFY));
699
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
700
				 sna->kgem.nbatch,
701
				 NULL,
702
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
703
				 BASE_ADDRESS_MODIFY));
704
	OUT_BATCH(0); /* media */
705
 
706
	/* upper bounds, all disabled */
707
	OUT_BATCH(BASE_ADDRESS_MODIFY);
708
	OUT_BATCH(0);
709
}
710
 
711
static void
712
gen4_emit_invariant(struct sna *sna)
713
{
714
	assert(sna->kgem.surface == sna->kgem.batch_size);
715
 
716
	if (sna->kgem.gen >= 045)
717
		OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
718
	else
719
		OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
720
 
721
	gen4_emit_state_base_address(sna);
722
 
723
	sna->render_state.gen4.needs_invariant = false;
724
}
725
 
726
static void
727
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
728
{
729
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
730
 
731
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
732
		DBG(("%s: flushing batch: %d < %d+%d\n",
733
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
734
		     150, 4*8));
735
		kgem_submit(&sna->kgem);
736
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
737
	}
738
 
739
	if (sna->render_state.gen4.needs_invariant)
740
		gen4_emit_invariant(sna);
741
}
742
 
743
static void
744
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
745
{
746
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
747
	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
4501 Serge 748
		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
4304 Serge 749
		     sna->render_state.gen4.floats_per_vertex,
4501 Serge 750
		     op->floats_per_vertex));
751
		gen4_vertex_align(sna, op);
4304 Serge 752
		sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
753
	}
754
}
755
 
756
static void
757
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
758
{
759
	if (sna->render_state.gen4.surface_table == offset)
760
		return;
761
 
762
	sna->render_state.gen4.surface_table = offset;
763
 
764
	/* Binding table pointers */
765
	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
766
	OUT_BATCH(0);		/* vs */
767
	OUT_BATCH(0);		/* gs */
768
	OUT_BATCH(0);		/* clip */
769
	OUT_BATCH(0);		/* sf */
770
	/* Only the PS uses the binding table */
771
	OUT_BATCH(offset*4);
772
}
773
 
774
static void
775
gen4_emit_pipelined_pointers(struct sna *sna,
776
			     const struct sna_composite_op *op,
777
			     int blend, int kernel)
778
{
779
	uint16_t sp, bp;
780
	uint32_t key;
781
 
782
	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
783
	     __FUNCTION__, op->u.gen4.ve_id & 2,
784
	     op->src.filter, op->src.repeat,
785
	     op->mask.filter, op->mask.repeat,
786
	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
787
 
788
	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
789
			    op->mask.filter, op->mask.repeat,
790
			    kernel);
791
	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
792
 
793
	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
794
	key = sp | (uint32_t)bp << 16;
795
	if (key == sna->render_state.gen4.last_pipelined_pointers)
796
		return;
797
 
798
	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
799
	OUT_BATCH(sna->render_state.gen4.vs);
800
	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
801
	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
802
	OUT_BATCH(sna->render_state.gen4.sf);
803
	OUT_BATCH(sna->render_state.gen4.wm + sp);
804
	OUT_BATCH(sna->render_state.gen4.cc + bp);
805
 
806
	sna->render_state.gen4.last_pipelined_pointers = key;
807
	gen4_emit_urb(sna);
808
}
809
 
810
static bool
811
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
812
{
813
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
814
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
815
 
816
	assert(!too_large(op->dst.x, op->dst.y));
817
	assert(!too_large(op->dst.width, op->dst.height));
818
 
819
	if (sna->render_state.gen4.drawrect_limit == limit &&
820
	    sna->render_state.gen4.drawrect_offset == offset)
821
		return true;
822
 
823
	sna->render_state.gen4.drawrect_offset = offset;
824
	sna->render_state.gen4.drawrect_limit = limit;
825
 
826
	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
827
	OUT_BATCH(0);
828
	OUT_BATCH(limit);
829
	OUT_BATCH(offset);
830
	return false;
831
}
832
 
833
static void
834
gen4_emit_vertex_elements(struct sna *sna,
835
			  const struct sna_composite_op *op)
836
{
837
	/*
838
	 * vertex data in vertex buffer
839
	 *    position: (x, y)
840
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
841
	 *    texture coordinate 1 if (has_mask is true): same as above
842
	 */
843
	struct gen4_render_state *render = &sna->render_state.gen4;
844
	uint32_t src_format, dw;
845
	int id = op->u.gen4.ve_id;
846
 
847
	if (render->ve_id == id)
848
		return;
849
	render->ve_id = id;
850
 
851
	/* The VUE layout
852
	 *    dword 0-3: position (x, y, 1.0, 1.0),
853
	 *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
854
	 *    [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
855
	 */
856
	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
857
 
858
	/* x,y */
859
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
860
		  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
861
 
862
	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
863
		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
864
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
865
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
866
		  (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
867
 
868
	/* u0, v0, w0 */
869
	/* u0, v0, w0 */
870
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
871
	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
872
	switch (id & 3) {
873
	default:
874
		assert(0);
875
	case 0:
876
		src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
877
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
878
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
879
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
880
		break;
881
	case 1:
882
		src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
883
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
884
		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
885
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
886
		break;
887
	case 2:
888
		src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
889
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
890
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
891
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
892
		break;
893
	case 3:
894
		src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
895
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
896
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
897
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
898
		break;
899
	}
900
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
901
		  src_format << VE0_FORMAT_SHIFT |
902
		  4 << VE0_OFFSET_SHIFT);
903
	OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
904
 
905
	/* u1, v1, w1 */
906
	if (id >> 2) {
907
		unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
908
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
909
		     id >> 2, src_offset));
910
		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
911
		switch (id >> 2) {
912
		case 1:
913
			src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
914
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
915
			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
916
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
917
			break;
918
		default:
919
			assert(0);
920
		case 2:
921
			src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
922
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
923
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
924
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
925
			break;
926
		case 3:
927
			src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
928
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
929
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
930
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
931
			break;
932
		}
933
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
934
			  src_format << VE0_FORMAT_SHIFT |
935
			  src_offset << VE0_OFFSET_SHIFT);
936
		OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
937
	} else {
938
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
939
			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
940
 
941
		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
942
			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
943
			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
944
			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
945
			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
946
	}
947
}
948
 
949
static void
950
gen4_emit_state(struct sna *sna,
951
		const struct sna_composite_op *op,
952
		uint16_t wm_binding_table)
953
{
954
	bool flush;
955
 
956
	assert(op->dst.bo->exec);
957
 
958
	flush = wm_binding_table & 1;
959
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
960
		DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
961
		     kgem_bo_is_dirty(op->src.bo),
962
		     kgem_bo_is_dirty(op->mask.bo),
963
		     flush));
964
		OUT_BATCH(MI_FLUSH);
965
		kgem_clear_dirty(&sna->kgem);
966
		kgem_bo_mark_dirty(op->dst.bo);
967
		flush = false;
968
	}
969
	flush &= gen4_emit_drawing_rectangle(sna, op);
970
	if (flush && op->op > PictOpSrc)
971
		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
972
 
973
	gen4_emit_binding_table(sna, wm_binding_table & ~1);
974
	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
975
	gen4_emit_vertex_elements(sna, op);
976
}
977
 
978
static void
979
gen4_bind_surfaces(struct sna *sna,
980
		   const struct sna_composite_op *op)
981
{
982
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
983
	uint32_t *binding_table;
984
	uint16_t offset;
985
 
986
	gen4_get_batch(sna, op);
987
 
988
	binding_table = gen4_composite_get_binding_table(sna, &offset);
989
 
990
	binding_table[0] =
991
		gen4_bind_bo(sna,
992
			    op->dst.bo, op->dst.width, op->dst.height,
993
			    gen4_get_dest_format(op->dst.format),
994
			    true);
995
	binding_table[1] =
996
		gen4_bind_bo(sna,
997
			     op->src.bo, op->src.width, op->src.height,
998
			     op->src.card_format,
999
			     false);
1000
	if (op->mask.bo) {
1001
		assert(op->u.gen4.ve_id >> 2);
1002
		binding_table[2] =
1003
			gen4_bind_bo(sna,
1004
				     op->mask.bo,
1005
				     op->mask.width,
1006
				     op->mask.height,
1007
				     op->mask.card_format,
1008
				     false);
1009
	}
1010
 
1011
	if (sna->kgem.surface == offset &&
1012
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
1013
	    (op->mask.bo == NULL ||
1014
	     sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
1015
		sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1016
		offset = sna->render_state.gen4.surface_table;
1017
	}
1018
 
1019
	gen4_emit_state(sna, op, offset | dirty);
1020
}
1021
 
1022
fastcall static void
1023
gen4_render_composite_blt(struct sna *sna,
1024
			  const struct sna_composite_op *op,
1025
			  const struct sna_composite_rectangles *r)
1026
{
1027
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1028
	     __FUNCTION__,
1029
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1030
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1031
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1032
	     r->width, r->height));
1033
 
1034
	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1035
	op->prim_emit(sna, op, r);
1036
}
1037
 
1038
#if 0
1039
fastcall static void
1040
gen4_render_composite_box(struct sna *sna,
1041
			  const struct sna_composite_op *op,
1042
			  const BoxRec *box)
1043
{
1044
	struct sna_composite_rectangles r;
1045
 
1046
	DBG(("  %s: (%d, %d), (%d, %d)\n",
1047
	     __FUNCTION__,
1048
	     box->x1, box->y1, box->x2, box->y2));
1049
 
1050
	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1051
 
1052
	r.dst.x = box->x1;
1053
	r.dst.y = box->y1;
1054
	r.width  = box->x2 - box->x1;
1055
	r.height = box->y2 - box->y1;
1056
	r.mask = r.src = r.dst;
1057
 
1058
	op->prim_emit(sna, op, &r);
1059
}
1060
 
1061
static void
1062
gen4_render_composite_boxes__blt(struct sna *sna,
1063
				 const struct sna_composite_op *op,
1064
				 const BoxRec *box, int nbox)
1065
{
1066
	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1067
	     __FUNCTION__, nbox, op->dst.x, op->dst.y,
1068
	     op->src.offset[0], op->src.offset[1],
1069
	     op->src.width, op->src.height,
1070
	     op->mask.offset[0], op->mask.offset[1],
1071
	     op->mask.width, op->mask.height));
1072
 
1073
	do {
1074
		int nbox_this_time;
1075
 
1076
		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1077
						     gen4_bind_surfaces);
1078
		nbox -= nbox_this_time;
1079
 
1080
		do {
1081
			struct sna_composite_rectangles r;
1082
 
1083
			DBG(("  %s: (%d, %d), (%d, %d)\n",
1084
			     __FUNCTION__,
1085
			     box->x1, box->y1, box->x2, box->y2));
1086
 
1087
			r.dst.x = box->x1;
1088
			r.dst.y = box->y1;
1089
			r.width  = box->x2 - box->x1;
1090
			r.height = box->y2 - box->y1;
1091
			r.mask = r.src = r.dst;
1092
			op->prim_emit(sna, op, &r);
1093
			box++;
1094
		} while (--nbox_this_time);
1095
	} while (nbox);
1096
}
1097
 
1098
static void
1099
gen4_render_composite_boxes(struct sna *sna,
1100
			    const struct sna_composite_op *op,
1101
			    const BoxRec *box, int nbox)
1102
{
1103
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1104
 
1105
	do {
1106
		int nbox_this_time;
1107
		float *v;
1108
 
1109
		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1110
						     gen4_bind_surfaces);
1111
		assert(nbox_this_time);
1112
		nbox -= nbox_this_time;
1113
 
1114
		v = sna->render.vertices + sna->render.vertex_used;
1115
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1116
 
1117
		op->emit_boxes(op, box, nbox_this_time, v);
1118
		box += nbox_this_time;
1119
	} while (nbox);
1120
}
1121
 
1122
#if !FORCE_FLUSH
1123
static void
1124
gen4_render_composite_boxes__thread(struct sna *sna,
1125
				    const struct sna_composite_op *op,
1126
				    const BoxRec *box, int nbox)
1127
{
1128
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1129
 
1130
	sna_vertex_lock(&sna->render);
1131
	do {
1132
		int nbox_this_time;
1133
		float *v;
1134
 
1135
		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1136
						     gen4_bind_surfaces);
1137
		assert(nbox_this_time);
1138
		nbox -= nbox_this_time;
1139
 
1140
		v = sna->render.vertices + sna->render.vertex_used;
1141
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1142
 
1143
		sna_vertex_acquire__locked(&sna->render);
1144
		sna_vertex_unlock(&sna->render);
1145
 
1146
		op->emit_boxes(op, box, nbox_this_time, v);
1147
		box += nbox_this_time;
1148
 
1149
		sna_vertex_lock(&sna->render);
1150
		sna_vertex_release__locked(&sna->render);
1151
	} while (nbox);
1152
	sna_vertex_unlock(&sna->render);
1153
}
1154
#endif
1155
 
1156
#ifndef MAX
1157
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1158
#endif
1159
 
1160
static uint32_t gen4_bind_video_source(struct sna *sna,
1161
				       struct kgem_bo *src_bo,
1162
				       uint32_t src_offset,
1163
				       int src_width,
1164
				       int src_height,
1165
				       int src_pitch,
1166
				       uint32_t src_surf_format)
1167
{
1168
	struct gen4_surface_state *ss;
1169
 
1170
	sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1171
 
1172
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1173
	ss->ss0.surface_type = GEN4_SURFACE_2D;
1174
	ss->ss0.surface_format = src_surf_format;
1175
	ss->ss0.color_blend = 1;
1176
 
1177
	ss->ss1.base_addr =
1178
		kgem_add_reloc(&sna->kgem,
1179
			       sna->kgem.surface + 1,
1180
			       src_bo,
1181
			       I915_GEM_DOMAIN_SAMPLER << 16,
1182
			       src_offset);
1183
 
1184
	ss->ss2.width  = src_width - 1;
1185
	ss->ss2.height = src_height - 1;
1186
	ss->ss3.pitch  = src_pitch - 1;
1187
 
1188
	return sna->kgem.surface * sizeof(uint32_t);
1189
}
1190
 
1191
static void gen4_video_bind_surfaces(struct sna *sna,
1192
				     const struct sna_composite_op *op)
1193
{
1194
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
1195
	struct sna_video_frame *frame = op->priv;
1196
	uint32_t src_surf_format;
1197
	uint32_t src_surf_base[6];
1198
	int src_width[6];
1199
	int src_height[6];
1200
	int src_pitch[6];
1201
	uint32_t *binding_table;
1202
	uint16_t offset;
1203
	int n_src, n;
1204
 
1205
	src_surf_base[0] = 0;
1206
	src_surf_base[1] = 0;
1207
	src_surf_base[2] = frame->VBufOffset;
1208
	src_surf_base[3] = frame->VBufOffset;
1209
	src_surf_base[4] = frame->UBufOffset;
1210
	src_surf_base[5] = frame->UBufOffset;
1211
 
1212
	if (is_planar_fourcc(frame->id)) {
1213
		src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
1214
		src_width[1]  = src_width[0]  = frame->width;
1215
		src_height[1] = src_height[0] = frame->height;
1216
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1217
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1218
			frame->width / 2;
1219
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1220
			frame->height / 2;
1221
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1222
			frame->pitch[0];
1223
		n_src = 6;
1224
	} else {
1225
		if (frame->id == FOURCC_UYVY)
1226
			src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
1227
		else
1228
			src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
1229
 
1230
		src_width[0]  = frame->width;
1231
		src_height[0] = frame->height;
1232
		src_pitch[0]  = frame->pitch[0];
1233
		n_src = 1;
1234
	}
1235
 
1236
	gen4_get_batch(sna, op);
1237
 
1238
	binding_table = gen4_composite_get_binding_table(sna, &offset);
1239
	binding_table[0] =
1240
		gen4_bind_bo(sna,
1241
			     op->dst.bo, op->dst.width, op->dst.height,
1242
			     gen4_get_dest_format(op->dst.format),
1243
			     true);
1244
	for (n = 0; n < n_src; n++) {
1245
		binding_table[1+n] =
1246
			gen4_bind_video_source(sna,
1247
					       frame->bo,
1248
					       src_surf_base[n],
1249
					       src_width[n],
1250
					       src_height[n],
1251
					       src_pitch[n],
1252
					       src_surf_format);
1253
	}
1254
 
1255
	gen4_emit_state(sna, op, offset | dirty);
1256
}
1257
 
1258
static bool
1259
gen4_render_video(struct sna *sna,
1260
		  struct sna_video *video,
1261
		  struct sna_video_frame *frame,
1262
		  RegionPtr dstRegion,
1263
		  PixmapPtr pixmap)
1264
{
1265
	struct sna_composite_op tmp;
1266
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1267
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1268
	int src_width = frame->src.x2 - frame->src.x1;
1269
	int src_height = frame->src.y2 - frame->src.y1;
1270
	float src_offset_x, src_offset_y;
1271
	float src_scale_x, src_scale_y;
1272
	int nbox, pix_xoff, pix_yoff;
1273
	struct sna_pixmap *priv;
1274
	BoxPtr box;
1275
 
1276
	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1277
	     src_width, src_height, dst_width, dst_height));
1278
 
1279
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1280
	if (priv == NULL)
1281
		return false;
1282
 
1283
	memset(&tmp, 0, sizeof(tmp));
1284
 
1285
	tmp.op = PictOpSrc;
1286
	tmp.dst.pixmap = pixmap;
1287
	tmp.dst.width  = pixmap->drawable.width;
1288
	tmp.dst.height = pixmap->drawable.height;
1289
	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1290
	tmp.dst.bo = priv->gpu_bo;
1291
 
1292
	if (src_width == dst_width && src_height == dst_height)
1293
		tmp.src.filter = SAMPLER_FILTER_NEAREST;
1294
	else
1295
		tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1296
	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1297
	tmp.src.bo = frame->bo;
1298
	tmp.mask.bo = NULL;
1299
	tmp.u.gen4.wm_kernel =
1300
		is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
1301
	tmp.u.gen4.ve_id = 2;
1302
	tmp.is_affine = true;
1303
	tmp.floats_per_vertex = 3;
1304
	tmp.floats_per_rect = 9;
1305
	tmp.priv = frame;
1306
 
1307
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1308
		kgem_submit(&sna->kgem);
4501 Serge 1309
		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1310
			return false;
4304 Serge 1311
	}
1312
 
4501 Serge 1313
	gen4_align_vertex(sna, &tmp);
4304 Serge 1314
	gen4_video_bind_surfaces(sna, &tmp);
1315
 
1316
	/* Set up the offset for translating from the given region (in screen
1317
	 * coordinates) to the backing pixmap.
1318
	 */
1319
#ifdef COMPOSITE
1320
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1321
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1322
#else
1323
	pix_xoff = 0;
1324
	pix_yoff = 0;
1325
#endif
1326
 
1327
	src_scale_x = (float)src_width / dst_width / frame->width;
1328
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1329
 
1330
	src_scale_y = (float)src_height / dst_height / frame->height;
1331
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1332
 
1333
	box = REGION_RECTS(dstRegion);
1334
	nbox = REGION_NUM_RECTS(dstRegion);
1335
	do {
1336
		int n;
1337
 
1338
		n = gen4_get_rectangles(sna, &tmp, nbox,
1339
					gen4_video_bind_surfaces);
1340
		assert(n);
1341
		nbox -= n;
1342
 
1343
		do {
1344
			BoxRec r;
1345
 
1346
			r.x1 = box->x1 + pix_xoff;
1347
			r.x2 = box->x2 + pix_xoff;
1348
			r.y1 = box->y1 + pix_yoff;
1349
			r.y2 = box->y2 + pix_yoff;
1350
 
1351
			OUT_VERTEX(r.x2, r.y2);
1352
			OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1353
			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1354
 
1355
			OUT_VERTEX(r.x1, r.y2);
1356
			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1357
			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1358
 
1359
			OUT_VERTEX(r.x1, r.y1);
1360
			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1361
			OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1362
 
1363
			if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1364
				sna_damage_add_box(&priv->gpu_damage, &r);
1365
				sna_damage_subtract_box(&priv->cpu_damage, &r);
1366
			}
1367
			box++;
1368
		} while (--n);
1369
	} while (nbox);
1370
	gen4_vertex_flush(sna);
1371
 
1372
	return true;
1373
}
1374
 
1375
static int
1376
gen4_composite_picture(struct sna *sna,
1377
		       PicturePtr picture,
1378
		       struct sna_composite_channel *channel,
1379
		       int x, int y,
1380
		       int w, int h,
1381
		       int dst_x, int dst_y,
1382
		       bool precise)
1383
{
1384
	PixmapPtr pixmap;
1385
	uint32_t color;
1386
	int16_t dx, dy;
1387
 
1388
	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1389
	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1390
 
1391
	channel->is_solid = false;
1392
	channel->card_format = -1;
1393
 
1394
	if (sna_picture_is_solid(picture, &color))
1395
		return gen4_channel_init_solid(sna, channel, color);
1396
 
1397
	if (picture->pDrawable == NULL) {
1398
		int ret;
1399
 
1400
		if (picture->pSourcePict->type == SourcePictTypeLinear)
1401
			return gen4_channel_init_linear(sna, picture, channel,
1402
							x, y,
1403
							w, h,
1404
							dst_x, dst_y);
1405
 
1406
		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1407
		ret = -1;
1408
		if (!precise)
1409
			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1410
								      x, y, w, h, dst_x, dst_y);
1411
		if (ret == -1)
1412
			ret = sna_render_picture_fixup(sna, picture, channel,
1413
						       x, y, w, h, dst_x, dst_y);
1414
		return ret;
1415
	}
1416
 
1417
	if (picture->alphaMap) {
1418
		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1419
		return sna_render_picture_fixup(sna, picture, channel,
1420
						x, y, w, h, dst_x, dst_y);
1421
	}
1422
 
1423
	if (!gen4_check_repeat(picture)) {
1424
		DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
1425
		return sna_render_picture_fixup(sna, picture, channel,
1426
						x, y, w, h, dst_x, dst_y);
1427
	}
1428
 
1429
	if (!gen4_check_filter(picture)) {
1430
		DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
1431
		return sna_render_picture_fixup(sna, picture, channel,
1432
						x, y, w, h, dst_x, dst_y);
1433
	}
1434
 
1435
	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1436
	channel->filter = picture->filter;
1437
 
1438
	pixmap = get_drawable_pixmap(picture->pDrawable);
1439
	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1440
 
1441
	x += dx + picture->pDrawable->x;
1442
	y += dy + picture->pDrawable->y;
1443
 
1444
	channel->is_affine = sna_transform_is_affine(picture->transform);
1445
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1446
		DBG(("%s: integer translation (%d, %d), removing\n",
1447
		     __FUNCTION__, dx, dy));
1448
		x += dx;
1449
		y += dy;
1450
		channel->transform = NULL;
1451
		channel->filter = PictFilterNearest;
1452
	} else
1453
		channel->transform = picture->transform;
1454
 
1455
	channel->pict_format = picture->format;
1456
	channel->card_format = gen4_get_card_format(picture->format);
1457
	if (channel->card_format == -1)
1458
		return sna_render_picture_convert(sna, picture, channel, pixmap,
1459
						  x, y, w, h, dst_x, dst_y,
1460
						  false);
1461
 
1462
	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1463
		return sna_render_picture_extract(sna, picture, channel,
1464
						  x, y, w, h, dst_x, dst_y);
1465
 
1466
	return sna_render_pixmap_bo(sna, channel, pixmap,
1467
				    x, y, w, h, dst_x, dst_y);
1468
}
1469
 
1470
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
1471
{
1472
	DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
1473
	     __FUNCTION__,
1474
	     channel->repeat, gen4_repeat(channel->repeat),
1475
	     channel->filter, gen4_repeat(channel->filter)));
1476
	channel->repeat = gen4_repeat(channel->repeat);
1477
	channel->filter = gen4_filter(channel->filter);
1478
	if (channel->card_format == (unsigned)-1)
1479
		channel->card_format = gen4_get_card_format(channel->pict_format);
1480
}
1481
#endif
1482
 
1483
static void
1484
gen4_render_composite_done(struct sna *sna,
1485
			   const struct sna_composite_op *op)
1486
{
1487
	DBG(("%s()\n", __FUNCTION__));
1488
 
1489
	if (sna->render.vertex_offset) {
1490
		gen4_vertex_flush(sna);
1491
		gen4_magic_ca_pass(sna, op);
1492
	}
1493
 
1494
}
1495
 
1496
#if 0
1497
static bool
1498
gen4_composite_set_target(struct sna *sna,
1499
			  struct sna_composite_op *op,
1500
			  PicturePtr dst,
1501
			  int x, int y, int w, int h,
1502
			  bool partial)
1503
{
1504
	BoxRec box;
1505
 
1506
	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1507
	op->dst.width  = op->dst.pixmap->drawable.width;
1508
	op->dst.height = op->dst.pixmap->drawable.height;
1509
	op->dst.format = dst->format;
1510
	if (w && h) {
1511
		box.x1 = x;
1512
		box.y1 = y;
1513
		box.x2 = x + w;
1514
		box.y2 = y + h;
1515
	} else
1516
		sna_render_picture_extents(dst, &box);
1517
 
1518
	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1519
					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1520
					  &box, &op->damage);
1521
	if (op->dst.bo == NULL)
1522
		return false;
1523
 
1524
	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1525
			    &op->dst.x, &op->dst.y);
1526
 
1527
	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1528
	     __FUNCTION__,
1529
	     op->dst.pixmap, (int)op->dst.format,
1530
	     op->dst.width, op->dst.height,
1531
	     op->dst.bo->pitch,
1532
	     op->dst.x, op->dst.y,
1533
	     op->damage ? *op->damage : (void *)-1));
1534
 
1535
	assert(op->dst.bo->proxy == NULL);
1536
 
1537
	if (too_large(op->dst.width, op->dst.height) &&
1538
	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1539
		return false;
1540
 
1541
	return true;
1542
}
1543
 
1544
static bool
1545
check_gradient(PicturePtr picture, bool precise)
1546
{
1547
	switch (picture->pSourcePict->type) {
1548
	case SourcePictTypeSolidFill:
1549
	case SourcePictTypeLinear:
1550
		return false;
1551
	default:
1552
		return precise;
1553
	}
1554
}
1555
 
1556
static bool
1557
has_alphamap(PicturePtr p)
1558
{
1559
	return p->alphaMap != NULL;
1560
}
1561
 
1562
static bool
1563
need_upload(struct sna *sna, PicturePtr p)
1564
{
1565
	return p->pDrawable && untransformed(p) &&
1566
		!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1567
}
1568
 
1569
static bool
1570
source_is_busy(PixmapPtr pixmap)
1571
{
1572
	struct sna_pixmap *priv = sna_pixmap(pixmap);
1573
	if (priv == NULL)
1574
		return false;
1575
 
1576
	if (priv->clear)
1577
		return false;
1578
 
1579
	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1580
		return true;
1581
 
1582
	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1583
		return true;
1584
 
1585
	return priv->gpu_damage && !priv->cpu_damage;
1586
}
1587
 
1588
static bool
1589
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1590
{
1591
	if (sna_picture_is_solid(p, NULL))
1592
		return false;
1593
 
1594
	if (p->pSourcePict)
1595
		return check_gradient(p, precise);
1596
 
1597
	if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
1598
		return true;
1599
 
1600
	/* soft errors: perfer to upload/compute rather than readback */
1601
	if (pixmap && source_is_busy(pixmap))
1602
		return false;
1603
 
1604
	return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
1605
}
1606
 
1607
static bool
1608
gen4_composite_fallback(struct sna *sna,
1609
			PicturePtr src,
1610
			PicturePtr mask,
1611
			PicturePtr dst)
1612
{
1613
	PixmapPtr src_pixmap;
1614
	PixmapPtr mask_pixmap;
1615
	PixmapPtr dst_pixmap;
1616
	bool src_fallback, mask_fallback;
1617
 
1618
	if (!gen4_check_dst_format(dst->format)) {
1619
		DBG(("%s: unknown destination format: %d\n",
1620
		     __FUNCTION__, dst->format));
1621
		return true;
1622
	}
1623
 
1624
	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1625
 
1626
	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1627
	src_fallback = source_fallback(sna, src, src_pixmap,
1628
				       dst->polyMode == PolyModePrecise);
1629
 
1630
	if (mask) {
1631
		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1632
		mask_fallback = source_fallback(sna, mask, mask_pixmap,
1633
						dst->polyMode == PolyModePrecise);
1634
	} else {
1635
		mask_pixmap = NULL;
1636
		mask_fallback = false;
1637
	}
1638
 
1639
	/* If we are using the destination as a source and need to
1640
	 * readback in order to upload the source, do it all
1641
	 * on the cpu.
1642
	 */
1643
	if (src_pixmap == dst_pixmap && src_fallback) {
1644
		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1645
		return true;
1646
	}
1647
	if (mask_pixmap == dst_pixmap && mask_fallback) {
1648
		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1649
		return true;
1650
	}
1651
 
1652
	/* If anything is on the GPU, push everything out to the GPU */
1653
	if (dst_use_gpu(dst_pixmap)) {
1654
		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1655
		     __FUNCTION__));
1656
		return false;
1657
	}
1658
 
1659
	if (src_pixmap && !src_fallback) {
1660
		DBG(("%s: src is already on the GPU, try to use GPU\n",
1661
		     __FUNCTION__));
1662
		return false;
1663
	}
1664
	if (mask_pixmap && !mask_fallback) {
1665
		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1666
		     __FUNCTION__));
1667
		return false;
1668
	}
1669
 
1670
	/* However if the dst is not on the GPU and we need to
1671
	 * render one of the sources using the CPU, we may
1672
	 * as well do the entire operation in place onthe CPU.
1673
	 */
1674
	if (src_fallback) {
1675
		DBG(("%s: dst is on the CPU and src will fallback\n",
1676
		     __FUNCTION__));
1677
		return true;
1678
	}
1679
 
1680
	if (mask_fallback) {
1681
		DBG(("%s: dst is on the CPU and mask will fallback\n",
1682
		     __FUNCTION__));
1683
		return true;
1684
	}
1685
 
1686
	if (too_large(dst_pixmap->drawable.width,
1687
		      dst_pixmap->drawable.height) &&
1688
	    dst_is_cpu(dst_pixmap)) {
1689
		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1690
		return true;
1691
	}
1692
 
1693
	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1694
	     __FUNCTION__));
1695
	return dst_use_cpu(dst_pixmap);
1696
}
1697
 
1698
static int
1699
reuse_source(struct sna *sna,
1700
	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1701
	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1702
{
1703
	uint32_t color;
1704
 
1705
	if (src_x != msk_x || src_y != msk_y)
1706
		return false;
1707
 
1708
	if (src == mask) {
1709
		DBG(("%s: mask is source\n", __FUNCTION__));
1710
		*mc = *sc;
1711
		mc->bo = kgem_bo_reference(mc->bo);
1712
		return true;
1713
	}
1714
 
1715
	if (sna_picture_is_solid(mask, &color))
1716
		return gen4_channel_init_solid(sna, mc, color);
1717
 
1718
	if (sc->is_solid)
1719
		return false;
1720
 
1721
	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1722
		return false;
1723
 
1724
	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1725
 
1726
	if (!sna_transform_equal(src->transform, mask->transform))
1727
		return false;
1728
 
1729
	if (!sna_picture_alphamap_equal(src, mask))
1730
		return false;
1731
 
1732
	if (!gen4_check_repeat(mask))
1733
		return false;
1734
 
1735
	if (!gen4_check_filter(mask))
1736
		return false;
1737
 
1738
	if (!gen4_check_format(mask->format))
1739
		return false;
1740
 
1741
	DBG(("%s: reusing source channel for mask with a twist\n",
1742
	     __FUNCTION__));
1743
 
1744
	*mc = *sc;
1745
	mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1746
	mc->filter = gen4_filter(mask->filter);
1747
	mc->pict_format = mask->format;
1748
	mc->card_format = gen4_get_card_format(mask->format);
1749
	mc->bo = kgem_bo_reference(mc->bo);
1750
	return true;
1751
}
1752
 
1753
static bool
1754
gen4_render_composite(struct sna *sna,
1755
		      uint8_t op,
1756
		      PicturePtr src,
1757
		      PicturePtr mask,
1758
		      PicturePtr dst,
1759
		      int16_t src_x, int16_t src_y,
1760
		      int16_t msk_x, int16_t msk_y,
1761
		      int16_t dst_x, int16_t dst_y,
1762
		      int16_t width, int16_t height,
1763
		      struct sna_composite_op *tmp)
1764
{
1765
	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1766
	     width, height, sna->kgem.mode));
1767
 
1768
	if (op >= ARRAY_SIZE(gen4_blend_op))
1769
		return false;
1770
 
1771
	if (mask == NULL &&
1772
	    sna_blt_composite(sna, op,
1773
			      src, dst,
1774
			      src_x, src_y,
1775
			      dst_x, dst_y,
1776
			      width, height,
1777
			      tmp, false))
1778
		return true;
1779
 
1780
	if (gen4_composite_fallback(sna, src, mask, dst))
1781
		return false;
1782
 
1783
	if (need_tiling(sna, width, height))
1784
		return sna_tiling_composite(op, src, mask, dst,
1785
					    src_x, src_y,
1786
					    msk_x, msk_y,
1787
					    dst_x, dst_y,
1788
					    width, height,
1789
					    tmp);
1790
 
1791
	if (!gen4_composite_set_target(sna, tmp, dst,
1792
				       dst_x, dst_y, width, height,
1793
				       op > PictOpSrc || dst->pCompositeClip->data)) {
1794
		DBG(("%s: failed to set composite target\n", __FUNCTION__));
1795
		return false;
1796
	}
1797
 
1798
	tmp->op = op;
1799
	switch (gen4_composite_picture(sna, src, &tmp->src,
1800
				       src_x, src_y,
1801
				       width, height,
1802
				       dst_x, dst_y,
1803
				       dst->polyMode == PolyModePrecise)) {
1804
	case -1:
1805
		DBG(("%s: failed to prepare source\n", __FUNCTION__));
1806
		goto cleanup_dst;
1807
	case 0:
1808
		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1809
			goto cleanup_dst;
1810
		/* fall through to fixup */
1811
	case 1:
1812
		if (mask == NULL &&
1813
		    sna_blt_composite__convert(sna,
1814
					       dst_x, dst_y, width, height,
1815
					       tmp))
1816
			return true;
1817
 
1818
		gen4_composite_channel_convert(&tmp->src);
1819
		break;
1820
	}
1821
 
1822
	tmp->is_affine = tmp->src.is_affine;
1823
	tmp->has_component_alpha = false;
1824
	tmp->need_magic_ca_pass = false;
1825
 
1826
	if (mask) {
1827
		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1828
			tmp->has_component_alpha = true;
1829
 
1830
			/* Check if it's component alpha that relies on a source alpha and on
1831
			 * the source value.  We can only get one of those into the single
1832
			 * source value that we get to blend with.
1833
			 */
1834
			if (gen4_blend_op[op].src_alpha &&
1835
			    (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
1836
				if (op != PictOpOver) {
1837
					DBG(("%s -- fallback: unhandled component alpha blend\n",
1838
					     __FUNCTION__));
1839
 
1840
					goto cleanup_src;
1841
				}
1842
 
1843
				tmp->need_magic_ca_pass = true;
1844
				tmp->op = PictOpOutReverse;
1845
			}
1846
		}
1847
 
1848
		if (!reuse_source(sna,
1849
				  src, &tmp->src, src_x, src_y,
1850
				  mask, &tmp->mask, msk_x, msk_y)) {
1851
			switch (gen4_composite_picture(sna, mask, &tmp->mask,
1852
						       msk_x, msk_y,
1853
						       width, height,
1854
						       dst_x, dst_y,
1855
						       dst->polyMode == PolyModePrecise)) {
1856
			case -1:
1857
				DBG(("%s: failed to prepare mask\n", __FUNCTION__));
1858
				goto cleanup_src;
1859
			case 0:
1860
				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
1861
					goto cleanup_src;
1862
				/* fall through to fixup */
1863
			case 1:
1864
				gen4_composite_channel_convert(&tmp->mask);
1865
				break;
1866
			}
1867
		}
1868
 
1869
		tmp->is_affine &= tmp->mask.is_affine;
1870
	}
1871
 
1872
	tmp->u.gen4.wm_kernel =
1873
		gen4_choose_composite_kernel(tmp->op,
1874
					     tmp->mask.bo != NULL,
1875
					     tmp->has_component_alpha,
1876
					     tmp->is_affine);
1877
	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
1878
 
1879
	tmp->blt   = gen4_render_composite_blt;
1880
	tmp->box   = gen4_render_composite_box;
1881
	tmp->boxes = gen4_render_composite_boxes__blt;
1882
	if (tmp->emit_boxes) {
1883
		tmp->boxes = gen4_render_composite_boxes;
1884
#if !FORCE_FLUSH
1885
		tmp->thread_boxes = gen4_render_composite_boxes__thread;
1886
#endif
1887
	}
1888
	tmp->done  = gen4_render_composite_done;
1889
 
1890
	if (!kgem_check_bo(&sna->kgem,
1891
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
1892
			   NULL)) {
1893
		kgem_submit(&sna->kgem);
1894
		if (!kgem_check_bo(&sna->kgem,
1895
				     tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
1896
				     NULL))
1897
			goto cleanup_mask;
1898
	}
1899
 
4501 Serge 1900
	gen4_align_vertex(sna, tmp);
4304 Serge 1901
	gen4_bind_surfaces(sna, tmp);
1902
	return true;
1903
 
1904
cleanup_mask:
1905
	if (tmp->mask.bo)
1906
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
1907
cleanup_src:
1908
	if (tmp->src.bo)
1909
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
1910
cleanup_dst:
1911
	if (tmp->redirect.real_bo)
1912
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
1913
	return false;
1914
}
1915
 
1916
#endif
1917
 
1918
 
1919
 
1920
 
1921
 
1922
 
1923
 
1924
 
1925
 
1926
 
1927
 
1928
 
1929
 
1930
 
1931
 
1932
 
1933
 
1934
 
1935
 
1936
 
1937
 
1938
 
1939
 
1940
 
1941
 
1942
 
1943
 
1944
 
1945
 
1946
 
1947
 
1948
 
1949
 
1950
 
1951
 
1952
 
1953
 
1954
 
1955
 
1956
 
1957
 
1958
static void gen4_render_reset(struct sna *sna)
1959
{
1960
	sna->render_state.gen4.needs_invariant = true;
1961
	sna->render_state.gen4.needs_urb = true;
1962
	sna->render_state.gen4.ve_id = -1;
1963
	sna->render_state.gen4.last_primitive = -1;
1964
	sna->render_state.gen4.last_pipelined_pointers = -1;
1965
 
1966
	sna->render_state.gen4.drawrect_offset = -1;
1967
	sna->render_state.gen4.drawrect_limit = -1;
1968
	sna->render_state.gen4.surface_table = -1;
1969
 
4501 Serge 1970
	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
4304 Serge 1971
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
1972
		discard_vbo(sna);
1973
	}
1974
 
1975
	sna->render.vertex_offset = 0;
1976
	sna->render.nvertex_reloc = 0;
1977
	sna->render.vb_id = 0;
1978
}
1979
 
1980
static void gen4_render_fini(struct sna *sna)
1981
{
1982
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
1983
}
1984
 
1985
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
1986
{
1987
	struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
1988
 
1989
	/* Set up the vertex shader to be disabled (passthrough) */
1990
	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
1991
	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
1992
	vs->vs6.vs_enable = 0;
1993
	vs->vs6.vert_cache_disable = 1;
1994
 
1995
	return sna_static_stream_offsetof(stream, vs);
1996
}
1997
 
1998
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
1999
				     uint32_t kernel)
2000
{
2001
	struct gen4_sf_unit_state *sf;
2002
 
2003
	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
2004
 
2005
	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
2006
	sf->thread0.kernel_start_pointer = kernel >> 6;
2007
	sf->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
2008
	sf->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
2009
	sf->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
2010
	/* don't smash vertex header, read start from dw8 */
2011
	sf->thread3.urb_entry_read_offset = 1;
2012
	sf->thread3.dispatch_grf_start_reg = 3;
2013
	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
2014
	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
2015
	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
2016
	sf->sf5.viewport_transform = false;	/* skip viewport */
2017
	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
2018
	sf->sf6.scissor = 0;
2019
	sf->sf7.trifan_pv = 2;
2020
	sf->sf6.dest_org_vbias = 0x8;
2021
	sf->sf6.dest_org_hbias = 0x8;
2022
 
2023
	return sna_static_stream_offsetof(stream, sf);
2024
}
2025
 
2026
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
2027
					  sampler_filter_t src_filter,
2028
					  sampler_extend_t src_extend,
2029
					  sampler_filter_t mask_filter,
2030
					  sampler_extend_t mask_extend)
2031
{
2032
	struct gen4_sampler_state *sampler_state;
2033
 
2034
	sampler_state = sna_static_stream_map(stream,
2035
					      sizeof(struct gen4_sampler_state) * 2,
2036
					      32);
2037
	sampler_state_init(&sampler_state[0], src_filter, src_extend);
2038
	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
2039
 
2040
	return sna_static_stream_offsetof(stream, sampler_state);
2041
}
2042
 
2043
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
2044
			       int gen,
2045
			       bool has_mask,
2046
			       uint32_t kernel,
2047
			       uint32_t sampler)
2048
{
2049
	assert((kernel & 63) == 0);
2050
	wm->thread0.kernel_start_pointer = kernel >> 6;
2051
	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
2052
 
2053
	wm->thread1.single_program_flow = 0;
2054
 
2055
	wm->thread3.const_urb_entry_read_length = 0;
2056
	wm->thread3.const_urb_entry_read_offset = 0;
2057
 
2058
	wm->thread3.urb_entry_read_offset = 0;
2059
	wm->thread3.dispatch_grf_start_reg = 3;
2060
 
2061
	assert((sampler & 31) == 0);
2062
	wm->wm4.sampler_state_pointer = sampler >> 5;
2063
	wm->wm4.sampler_count = 1;
2064
 
2065
	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
2066
	wm->wm5.transposed_urb_read = 0;
2067
	wm->wm5.thread_dispatch_enable = 1;
2068
	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
2069
	 * start point
2070
	 */
2071
	wm->wm5.enable_16_pix = 1;
2072
	wm->wm5.enable_8_pix = 0;
2073
	wm->wm5.early_depth_test = 1;
2074
 
2075
	/* Each pair of attributes (src/mask coords) is two URB entries */
2076
	if (has_mask) {
2077
		wm->thread1.binding_table_entry_count = 3;
2078
		wm->thread3.urb_entry_read_length = 4;
2079
	} else {
2080
		wm->thread1.binding_table_entry_count = 2;
2081
		wm->thread3.urb_entry_read_length = 2;
2082
	}
2083
}
2084
 
2085
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
2086
{
2087
	uint8_t *ptr, *base;
2088
	int i, j;
2089
 
2090
	base = ptr =
2091
		sna_static_stream_map(stream,
2092
				      GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
2093
				      64);
2094
 
2095
	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
2096
		for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
2097
			struct gen4_cc_unit_state *state =
2098
				(struct gen4_cc_unit_state *)ptr;
2099
 
2100
			state->cc3.blend_enable =
2101
				!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
2102
 
2103
			state->cc5.logicop_func = 0xc;	/* COPY */
2104
			state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
2105
 
2106
			/* Fill in alpha blend factors same as color, for the future. */
2107
			state->cc5.ia_src_blend_factor = i;
2108
			state->cc5.ia_dest_blend_factor = j;
2109
 
2110
			state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
2111
			state->cc6.clamp_post_alpha_blend = 1;
2112
			state->cc6.clamp_pre_alpha_blend = 1;
2113
			state->cc6.src_blend_factor = i;
2114
			state->cc6.dest_blend_factor = j;
2115
 
2116
			ptr += 64;
2117
		}
2118
	}
2119
 
2120
	return sna_static_stream_offsetof(stream, base);
2121
}
2122
 
2123
static bool gen4_render_setup(struct sna *sna)
2124
{
2125
	struct gen4_render_state *state = &sna->render_state.gen4;
2126
	struct sna_static_stream general;
2127
	struct gen4_wm_unit_state_padded *wm_state;
2128
	uint32_t sf, wm[KERNEL_COUNT];
2129
	int i, j, k, l, m;
2130
 
2131
	sna_static_stream_init(&general);
2132
 
2133
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2134
	 * dumps, you know it points to zero.
2135
	 */
2136
	null_create(&general);
2137
 
2138
	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
2139
	for (m = 0; m < KERNEL_COUNT; m++) {
2140
		if (wm_kernels[m].size) {
2141
			wm[m] = sna_static_stream_add(&general,
2142
						      wm_kernels[m].data,
2143
						      wm_kernels[m].size,
2144
						      64);
2145
		} else {
2146
			wm[m] = sna_static_stream_compile_wm(sna, &general,
2147
							     wm_kernels[m].data,
2148
							     16);
2149
		}
2150
	}
2151
 
2152
	state->vs = gen4_create_vs_unit_state(&general);
2153
	state->sf = gen4_create_sf_state(&general, sf);
2154
 
2155
	wm_state = sna_static_stream_map(&general,
2156
					  sizeof(*wm_state) * KERNEL_COUNT *
2157
					  FILTER_COUNT * EXTEND_COUNT *
2158
					  FILTER_COUNT * EXTEND_COUNT,
2159
					  64);
2160
	state->wm = sna_static_stream_offsetof(&general, wm_state);
2161
	for (i = 0; i < FILTER_COUNT; i++) {
2162
		for (j = 0; j < EXTEND_COUNT; j++) {
2163
			for (k = 0; k < FILTER_COUNT; k++) {
2164
				for (l = 0; l < EXTEND_COUNT; l++) {
2165
					uint32_t sampler_state;
2166
 
2167
					sampler_state =
2168
						gen4_create_sampler_state(&general,
2169
									  i, j,
2170
									  k, l);
2171
 
2172
					for (m = 0; m < KERNEL_COUNT; m++) {
2173
						gen4_init_wm_state(&wm_state->state,
2174
								   sna->kgem.gen,
2175
								   wm_kernels[m].has_mask,
2176
								   wm[m], sampler_state);
2177
						wm_state++;
2178
					}
2179
				}
2180
			}
2181
		}
2182
	}
2183
 
2184
	state->cc = gen4_create_cc_unit_state(&general);
2185
 
2186
	state->general_bo = sna_static_stream_fini(sna, &general);
2187
	return state->general_bo != NULL;
2188
}
2189
 
2190
const char *gen4_render_init(struct sna *sna, const char *backend)
2191
{
2192
	if (!gen4_render_setup(sna))
2193
		return backend;
2194
 
2195
	sna->kgem.retire = gen4_render_retire;
2196
	sna->kgem.expire = gen4_render_expire;
2197
 
2198
#if 0
2199
#if !NO_COMPOSITE
2200
	sna->render.composite = gen4_render_composite;
2201
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2202
#endif
2203
#if !NO_COMPOSITE_SPANS
2204
	sna->render.check_composite_spans = gen4_check_composite_spans;
2205
	sna->render.composite_spans = gen4_render_composite_spans;
2206
	if (0)
2207
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2208
#endif
2209
 
2210
#if !NO_VIDEO
2211
	sna->render.video = gen4_render_video;
2212
#endif
2213
 
2214
#if !NO_COPY_BOXES
2215
	sna->render.copy_boxes = gen4_render_copy_boxes;
2216
#endif
2217
#if !NO_COPY
2218
	sna->render.copy = gen4_render_copy;
2219
#endif
2220
 
2221
#if !NO_FILL_BOXES
2222
	sna->render.fill_boxes = gen4_render_fill_boxes;
2223
#endif
2224
#if !NO_FILL
2225
	sna->render.fill = gen4_render_fill;
2226
#endif
2227
#if !NO_FILL_ONE
2228
	sna->render.fill_one = gen4_render_fill_one;
2229
#endif
2230
 
2231
#endif
2232
 
2233
    sna->render.blit_tex = gen4_blit_tex;
2234
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2235
 
2236
	sna->render.flush = gen4_render_flush;
2237
	sna->render.reset = gen4_render_reset;
2238
	sna->render.fini = gen4_render_fini;
2239
 
2240
	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
2241
	sna->render.max_3d_pitch = 1 << 18;
2242
	return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
2243
}
2244
 
2245
static bool
2246
gen4_blit_tex(struct sna *sna,
2247
              uint8_t op, bool scale,
2248
		      PixmapPtr src, struct kgem_bo *src_bo,
2249
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2250
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2251
              int32_t src_x, int32_t src_y,
2252
              int32_t msk_x, int32_t msk_y,
2253
              int32_t dst_x, int32_t dst_y,
2254
              int32_t width, int32_t height,
2255
              struct sna_composite_op *tmp)
2256
{
2257
 
2258
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2259
         width, height, sna->kgem.ring));
2260
 
2261
    tmp->op = PictOpSrc;
2262
 
2263
    tmp->dst.pixmap = dst;
2264
    tmp->dst.bo     = dst_bo;
2265
    tmp->dst.width  = dst->drawable.width;
2266
    tmp->dst.height = dst->drawable.height;
2267
    tmp->dst.format = PICT_a8r8g8b8;
2268
 
2269
 
2270
	tmp->src.repeat = RepeatNone;
2271
	tmp->src.filter = PictFilterNearest;
2272
    tmp->src.is_affine = true;
2273
 
2274
    tmp->src.bo = src_bo;
2275
	tmp->src.pict_format = PICT_x8r8g8b8;
2276
    tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
2277
    tmp->src.width  = src->drawable.width;
2278
    tmp->src.height = src->drawable.height;
2279
 
2280
	tmp->is_affine = tmp->src.is_affine;
2281
	tmp->has_component_alpha = false;
2282
	tmp->need_magic_ca_pass = false;
2283
 
2284
 	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2285
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2286
    tmp->mask.is_affine = true;
2287
 
2288
    tmp->mask.bo = mask_bo;
2289
    tmp->mask.pict_format = PIXMAN_a8;
2290
    tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
2291
    tmp->mask.width  = mask->drawable.width;
2292
    tmp->mask.height = mask->drawable.height;
2293
 
2294
    if( scale )
2295
    {
2296
        tmp->src.scale[0] = 1.f/width;
2297
        tmp->src.scale[1] = 1.f/height;
2298
    }
2299
    else
2300
    {
2301
        tmp->src.scale[0] = 1.f/src->drawable.width;
2302
        tmp->src.scale[1] = 1.f/src->drawable.height;
2303
    }
2304
//    tmp->src.offset[0] = -dst_x;
2305
//    tmp->src.offset[1] = -dst_y;
2306
 
2307
 
2308
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2309
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2310
//    tmp->mask.offset[0] = -dst_x;
2311
//    tmp->mask.offset[1] = -dst_y;
2312
 
2313
    tmp->u.gen4.wm_kernel = WM_KERNEL_MASK;
2314
//       gen4_choose_composite_kernel(tmp->op,
2315
//                        tmp->mask.bo != NULL,
2316
//                        tmp->has_component_alpha,
2317
//                        tmp->is_affine);
2318
	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
2319
 
2320
	tmp->blt   = gen4_render_composite_blt;
2321
	tmp->done  = gen4_render_composite_done;
2322
 
2323
	if (!kgem_check_bo(&sna->kgem,
2324
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2325
			   NULL)) {
2326
		kgem_submit(&sna->kgem);
2327
	}
2328
 
4501 Serge 2329
	gen4_align_vertex(sna, tmp);
4304 Serge 2330
	gen4_bind_surfaces(sna, tmp);
2331
	return true;
2332
}
2333