Subversion Repositories Kolibri OS

Rev

Rev 4251 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3291 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen4_render.h"
45
#include "gen4_source.h"
46
#include "gen4_vertex.h"
47
 
48
/* gen4 has a serious issue with its shaders that we need to flush
49
 * after every rectangle... So until that is resolved, prefer
50
 * the BLT engine.
51
 */
52
#define FORCE_SPANS 0
53
#define FORCE_NONRECTILINEAR_SPANS -1
4251 Serge 54
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
3291 Serge 55
 
56
#define NO_COMPOSITE 0
57
#define NO_COMPOSITE_SPANS 0
58
#define NO_COPY 0
59
#define NO_COPY_BOXES 0
60
#define NO_FILL 0
61
#define NO_FILL_ONE 0
62
#define NO_FILL_BOXES 0
63
#define NO_VIDEO 0
64
 
4251 Serge 65
#define MAX_FLUSH_VERTICES 6
66
 
3291 Serge 67
#define GEN4_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
68
 
69
/* Set up a default static partitioning of the URB, which is supposed to
70
 * allow anything we would want to do, at potentially lower performance.
71
 */
72
#define URB_CS_ENTRY_SIZE     1
73
#define URB_CS_ENTRIES        0
74
 
75
#define URB_VS_ENTRY_SIZE     1
76
#define URB_VS_ENTRIES        32
77
 
78
#define URB_GS_ENTRY_SIZE     0
79
#define URB_GS_ENTRIES        0
80
 
81
#define URB_CLIP_ENTRY_SIZE   0
82
#define URB_CLIP_ENTRIES      0
83
 
84
#define URB_SF_ENTRY_SIZE     2
85
#define URB_SF_ENTRIES        64
86
 
87
/*
88
 * this program computes dA/dx and dA/dy for the texture coordinates along
89
 * with the base texture coordinate. It was extracted from the Mesa driver
90
 */
91
 
92
#define SF_KERNEL_NUM_GRF 16
93
#define PS_KERNEL_NUM_GRF 32
94
 
95
#define GEN4_MAX_SF_THREADS 24
96
#define GEN4_MAX_WM_THREADS 32
97
#define G4X_MAX_WM_THREADS 50
98
 
99
static const uint32_t ps_kernel_packed_static[][4] = {
100
#include "exa_wm_xy.g4b"
101
#include "exa_wm_src_affine.g4b"
102
#include "exa_wm_src_sample_argb.g4b"
103
#include "exa_wm_yuv_rgb.g4b"
104
#include "exa_wm_write.g4b"
105
};
106
 
107
static const uint32_t ps_kernel_planar_static[][4] = {
108
#include "exa_wm_xy.g4b"
109
#include "exa_wm_src_affine.g4b"
110
#include "exa_wm_src_sample_planar.g4b"
111
#include "exa_wm_yuv_rgb.g4b"
112
#include "exa_wm_write.g4b"
113
};
114
 
115
#define NOKERNEL(kernel_enum, func, masked) \
116
    [kernel_enum] = {func, 0, masked}
117
#define KERNEL(kernel_enum, kernel, masked) \
118
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
119
static const struct wm_kernel_info {
120
    const void *data;
121
    unsigned int size;
122
    bool has_mask;
123
} wm_kernels[] = {
124
    NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
125
    NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
126
 
127
    NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
128
    NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
129
 
130
    NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
131
    NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
132
 
133
    NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
134
    NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
135
 
136
    NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
137
    NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
138
 
139
    KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
140
    KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
141
};
142
#undef KERNEL
143
 
144
static const struct blendinfo {
145
    bool src_alpha;
146
    uint32_t src_blend;
147
    uint32_t dst_blend;
148
} gen4_blend_op[] = {
149
    /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
150
    /* Src */   {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
151
    /* Dst */   {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
152
    /* Over */  {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
153
    /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
154
    /* In */    {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
155
    /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
156
    /* Out */   {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
157
    /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
158
    /* Atop */  {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
159
    /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
160
    /* Xor */   {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
161
    /* Add */   {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
162
};
163
 
164
/**
165
 * Highest-valued BLENDFACTOR used in gen4_blend_op.
166
 *
167
 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
168
 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
169
 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
170
 */
171
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
172
 
173
#define BLEND_OFFSET(s, d) \
174
    (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
175
 
176
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
177
    ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
178
 
179
static void
180
gen4_emit_pipelined_pointers(struct sna *sna,
181
                 const struct sna_composite_op *op,
182
                 int blend, int kernel);
183
 
184
#define OUT_BATCH(v) batch_emit(sna, v)
185
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
186
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
187
 
188
#define GEN4_MAX_3D_SIZE 8192
189
 
190
static inline bool too_large(int width, int height)
191
{
192
    return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
193
}
194
 
195
static int
196
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
197
{
198
    int base;
199
 
200
    if (has_mask) {
201
        if (is_ca) {
202
            if (gen4_blend_op[op].src_alpha)
203
                base = WM_KERNEL_MASKSA;
204
            else
205
                base = WM_KERNEL_MASKCA;
206
        } else
207
            base = WM_KERNEL_MASK;
208
    } else
209
        base = WM_KERNEL;
210
 
211
    return base + !is_affine;
212
}
213
 
214
static bool gen4_magic_ca_pass(struct sna *sna,
215
                   const struct sna_composite_op *op)
216
{
217
    struct gen4_render_state *state = &sna->render_state.gen4;
218
 
219
    if (!op->need_magic_ca_pass)
220
        return false;
221
 
222
    assert(sna->render.vertex_index > sna->render.vertex_start);
223
 
224
    DBG(("%s: CA fixup\n", __FUNCTION__));
225
    assert(op->mask.bo != NULL);
226
    assert(op->has_component_alpha);
227
 
228
    gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
229
                     gen4_choose_composite_kernel(PictOpAdd,
230
                                  true, true, op->is_affine));
231
 
232
    OUT_BATCH(GEN4_3DPRIMITIVE |
233
          GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
234
          (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
235
          (0 << 9) |
236
          4);
237
    OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
238
    OUT_BATCH(sna->render.vertex_start);
239
    OUT_BATCH(1);   /* single instance */
240
    OUT_BATCH(0);   /* start instance location */
241
    OUT_BATCH(0);   /* index buffer offset, ignored */
242
 
243
    state->last_primitive = sna->kgem.nbatch;
244
    return true;
245
}
246
 
247
static uint32_t gen4_get_blend(int op,
248
                   bool has_component_alpha,
249
                   uint32_t dst_format)
250
{
251
    uint32_t src, dst;
252
 
253
    src = GEN4_BLENDFACTOR_ONE;  //gen4_blend_op[op].src_blend;
3769 Serge 254
    dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
3291 Serge 255
#if 0
256
    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
257
     * it as always 1.
258
     */
259
    if (PICT_FORMAT_A(dst_format) == 0) {
260
        if (src == GEN4_BLENDFACTOR_DST_ALPHA)
261
            src = GEN4_BLENDFACTOR_ONE;
262
        else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
263
            src = GEN4_BLENDFACTOR_ZERO;
264
    }
265
 
266
    /* If the source alpha is being used, then we should only be in a
267
     * case where the source blend factor is 0, and the source blend
268
     * value is the mask channels multiplied by the source picture's alpha.
269
     */
270
    if (has_component_alpha && gen4_blend_op[op].src_alpha) {
271
        if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
272
            dst = GEN4_BLENDFACTOR_SRC_COLOR;
273
        else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
274
            dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
275
    }
276
#endif
277
    DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
278
         op, dst_format, PICT_FORMAT_A(dst_format),
279
         src, dst, BLEND_OFFSET(src, dst)));
280
    return BLEND_OFFSET(src, dst);
281
}
282
 
283
static uint32_t gen4_get_card_format(PictFormat format)
284
{
285
    switch (format) {
286
    default:
287
        return -1;
288
    case PICT_a8r8g8b8:
289
        return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
290
    case PICT_x8r8g8b8:
291
        return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
4251 Serge 292
	case PICT_a8b8g8r8:
293
		return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
294
	case PICT_x8b8g8r8:
295
		return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
296
	case PICT_a2r10g10b10:
297
		return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
298
	case PICT_x2r10g10b10:
299
		return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
300
	case PICT_r8g8b8:
301
		return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
302
	case PICT_r5g6b5:
303
		return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
304
	case PICT_a1r5g5b5:
305
		return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
3291 Serge 306
    case PICT_a8:
307
        return GEN4_SURFACEFORMAT_A8_UNORM;
4251 Serge 308
	case PICT_a4r4g4b4:
309
		return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
3291 Serge 310
    }
311
}
312
 
313
static uint32_t gen4_get_dest_format(PictFormat format)
314
{
315
    switch (format) {
316
    default:
317
        return -1;
318
    case PICT_a8r8g8b8:
319
    case PICT_x8r8g8b8:
320
        return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
4251 Serge 321
	case PICT_a8b8g8r8:
322
	case PICT_x8b8g8r8:
323
		return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
324
	case PICT_a2r10g10b10:
325
	case PICT_x2r10g10b10:
326
		return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
327
	case PICT_r5g6b5:
328
		return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
329
	case PICT_x1r5g5b5:
330
	case PICT_a1r5g5b5:
331
		return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
3291 Serge 332
    case PICT_a8:
333
        return GEN4_SURFACEFORMAT_A8_UNORM;
4251 Serge 334
	case PICT_a4r4g4b4:
335
	case PICT_x4r4g4b4:
336
		return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
3291 Serge 337
    }
338
}
339
 
340
typedef struct gen4_surface_state_padded {
341
	struct gen4_surface_state state;
342
	char pad[32 - sizeof(struct gen4_surface_state)];
343
} gen4_surface_state_padded;
344
 
345
static void null_create(struct sna_static_stream *stream)
346
{
347
	/* A bunch of zeros useful for legacy border color and depth-stencil */
348
	sna_static_stream_map(stream, 64, 64);
349
}
350
 
351
static void
352
sampler_state_init(struct gen4_sampler_state *sampler_state,
353
		   sampler_filter_t filter,
354
		   sampler_extend_t extend)
355
{
356
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
357
 
358
	/* We use the legacy mode to get the semantics specified by
359
	 * the Render extension. */
360
	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
361
 
362
	switch (filter) {
363
	default:
364
	case SAMPLER_FILTER_NEAREST:
365
		sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
366
		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
367
		break;
368
	case SAMPLER_FILTER_BILINEAR:
369
		sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
370
		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
371
		break;
372
	}
373
 
374
	switch (extend) {
375
	default:
376
	case SAMPLER_EXTEND_NONE:
377
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
378
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
379
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
380
		break;
381
	case SAMPLER_EXTEND_REPEAT:
382
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
383
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
384
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
385
		break;
386
	case SAMPLER_EXTEND_PAD:
387
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
388
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
389
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
390
		break;
391
	case SAMPLER_EXTEND_REFLECT:
392
		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
393
		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
394
		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
395
		break;
396
	}
397
}
398
 
399
static uint32_t
400
gen4_tiling_bits(uint32_t tiling)
401
{
402
	switch (tiling) {
403
	default: assert(0);
404
	case I915_TILING_NONE: return 0;
405
	case I915_TILING_X: return GEN4_SURFACE_TILED;
406
	case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
407
	}
408
}
409
 
410
/**
411
 * Sets up the common fields for a surface state buffer for the given
412
 * picture in the given surface state buffer.
413
 */
414
static uint32_t
415
gen4_bind_bo(struct sna *sna,
416
	     struct kgem_bo *bo,
417
	     uint32_t width,
418
	     uint32_t height,
419
	     uint32_t format,
420
	     bool is_dst)
421
{
422
	uint32_t domains;
423
	uint16_t offset;
424
	uint32_t *ss;
425
 
426
	assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
427
 
428
	/* After the first bind, we manage the cache domains within the batch */
4251 Serge 429
	offset = kgem_bo_get_binding(bo, format | is_dst << 31);
3291 Serge 430
	if (offset) {
431
		if (is_dst)
432
			kgem_bo_mark_dirty(bo);
433
		return offset * sizeof(uint32_t);
434
	}
435
 
436
	offset = sna->kgem.surface -=
437
		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
438
	ss = sna->kgem.batch + offset;
439
 
440
	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
441
		 GEN4_SURFACE_BLEND_ENABLED |
442
		 format << GEN4_SURFACE_FORMAT_SHIFT);
443
 
4251 Serge 444
	if (is_dst) {
445
		ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
3291 Serge 446
		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
4251 Serge 447
	} else
3291 Serge 448
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
449
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
450
 
451
	ss[2] = ((width - 1)  << GEN4_SURFACE_WIDTH_SHIFT |
452
		 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
453
	ss[3] = (gen4_tiling_bits(bo->tiling) |
454
		 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
455
	ss[4] = 0;
456
	ss[5] = 0;
457
 
4251 Serge 458
	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
3291 Serge 459
 
460
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
461
	     offset, bo->handle, ss[1],
462
	     format, width, height, bo->pitch, bo->tiling,
463
	     domains & 0xffff ? "render" : "sampler"));
464
 
465
	return offset * sizeof(uint32_t);
466
}
467
 
468
static void gen4_emit_vertex_buffer(struct sna *sna,
469
				    const struct sna_composite_op *op)
470
{
471
	int id = op->u.gen4.ve_id;
472
 
473
	assert((sna->render.vb_id & (1 << id)) == 0);
474
 
475
	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
476
	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
477
		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
478
	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
479
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
480
	OUT_BATCH(0);
481
	OUT_BATCH(0);
482
	OUT_BATCH(0);
483
 
484
	sna->render.vb_id |= 1 << id;
485
}
486
 
487
static void gen4_emit_primitive(struct sna *sna)
488
{
489
	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
490
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
491
		return;
492
	}
493
 
494
	OUT_BATCH(GEN4_3DPRIMITIVE |
495
		  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
496
		  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
497
		  (0 << 9) |
498
		  4);
499
	sna->render.vertex_offset = sna->kgem.nbatch;
500
	OUT_BATCH(0);	/* vertex count, to be filled in later */
501
	OUT_BATCH(sna->render.vertex_index);
502
	OUT_BATCH(1);	/* single instance */
503
	OUT_BATCH(0);	/* start instance location */
504
	OUT_BATCH(0);	/* index buffer offset, ignored */
505
	sna->render.vertex_start = sna->render.vertex_index;
506
 
507
	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
508
}
509
 
510
static bool gen4_rectangle_begin(struct sna *sna,
511
				 const struct sna_composite_op *op)
512
{
4251 Serge 513
	unsigned int id = 1 << op->u.gen4.ve_id;
3291 Serge 514
	int ndwords;
515
 
516
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
517
		return true;
518
 
519
	/* 7xpipelined pointers + 6xprimitive + 1xflush */
520
	ndwords = op->need_magic_ca_pass? 20 : 6;
4251 Serge 521
	if ((sna->render.vb_id & id) == 0)
3291 Serge 522
		ndwords += 5;
4251 Serge 523
	ndwords += 2*FORCE_FLUSH;
3291 Serge 524
 
525
	if (!kgem_check_batch(&sna->kgem, ndwords))
526
		return false;
527
 
4251 Serge 528
	if ((sna->render.vb_id & id) == 0)
3291 Serge 529
		gen4_emit_vertex_buffer(sna, op);
530
	if (sna->render.vertex_offset == 0)
531
		gen4_emit_primitive(sna);
532
 
533
	return true;
534
}
535
 
536
static int gen4_get_rectangles__flush(struct sna *sna,
537
				      const struct sna_composite_op *op)
538
{
539
	/* Preventing discarding new vbo after lock contention */
540
	if (sna_vertex_wait__locked(&sna->render)) {
541
		int rem = vertex_space(sna);
542
		if (rem > op->floats_per_rect)
543
			return rem;
544
	}
545
 
4251 Serge 546
	if (!kgem_check_batch(&sna->kgem,
547
			      2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
3291 Serge 548
		return 0;
549
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
550
		return 0;
551
 
552
	if (op->need_magic_ca_pass && sna->render.vbo)
553
		return 0;
554
 
555
	if (sna->render.vertex_offset) {
556
		gen4_vertex_flush(sna);
557
		if (gen4_magic_ca_pass(sna, op))
558
			gen4_emit_pipelined_pointers(sna, op, op->op,
559
						     op->u.gen4.wm_kernel);
560
	}
561
 
562
	return gen4_vertex_finish(sna);
563
}
564
 
565
inline static int gen4_get_rectangles(struct sna *sna,
566
				      const struct sna_composite_op *op,
567
				      int want,
568
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
569
{
570
	int rem;
571
 
572
	assert(want);
4251 Serge 573
#if FORCE_FLUSH
574
	rem = sna->render.vertex_offset;
575
	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
576
		rem = sna->kgem.nbatch - 5;
577
	if (rem) {
578
		rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
579
		if (rem <= 0) {
580
			if (sna->render.vertex_offset) {
581
				gen4_vertex_flush(sna);
582
				if (gen4_magic_ca_pass(sna, op))
583
					gen4_emit_pipelined_pointers(sna, op, op->op,
584
								     op->u.gen4.wm_kernel);
585
			}
586
			OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
587
			rem = MAX_FLUSH_VERTICES;
588
		}
589
	} else
590
		rem = MAX_FLUSH_VERTICES;
591
	if (want > rem)
592
		want = rem;
593
#endif
3291 Serge 594
 
595
start:
596
	rem = vertex_space(sna);
597
	if (unlikely(rem < op->floats_per_rect)) {
598
		DBG(("flushing vbo for %s: %d < %d\n",
599
		     __FUNCTION__, rem, op->floats_per_rect));
600
		rem = gen4_get_rectangles__flush(sna, op);
601
		if (unlikely(rem == 0))
602
			goto flush;
603
	}
604
 
605
	if (unlikely(sna->render.vertex_offset == 0)) {
606
		if (!gen4_rectangle_begin(sna, op))
607
			goto flush;
608
		else
609
			goto start;
610
	}
611
 
612
	assert(rem <= vertex_space(sna));
4251 Serge 613
	assert(op->floats_per_rect <= rem);
3291 Serge 614
	if (want > 1 && want * op->floats_per_rect > rem)
615
		want = rem / op->floats_per_rect;
616
 
617
	sna->render.vertex_index += 3*want;
618
	return want;
619
 
620
flush:
621
	if (sna->render.vertex_offset) {
622
		gen4_vertex_flush(sna);
623
		gen4_magic_ca_pass(sna, op);
624
	}
625
	sna_vertex_wait__locked(&sna->render);
626
	_kgem_submit(&sna->kgem);
627
	emit_state(sna, op);
628
	goto start;
629
}
630
 
631
static uint32_t *
632
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
633
{
634
	sna->kgem.surface -=
635
		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
636
 
637
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
638
 
639
	/* Clear all surplus entries to zero in case of prefetch */
640
	*offset = sna->kgem.surface;
641
	return memset(sna->kgem.batch + sna->kgem.surface,
642
		      0, sizeof(struct gen4_surface_state_padded));
643
}
644
 
645
static void
646
gen4_emit_urb(struct sna *sna)
647
{
648
	int urb_vs_start, urb_vs_size;
649
	int urb_gs_start, urb_gs_size;
650
	int urb_clip_start, urb_clip_size;
651
	int urb_sf_start, urb_sf_size;
652
	int urb_cs_start, urb_cs_size;
653
 
654
	if (!sna->render_state.gen4.needs_urb)
655
		return;
656
 
657
	urb_vs_start = 0;
658
	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
659
	urb_gs_start = urb_vs_start + urb_vs_size;
660
	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
661
	urb_clip_start = urb_gs_start + urb_gs_size;
662
	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
663
	urb_sf_start = urb_clip_start + urb_clip_size;
664
	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
665
	urb_cs_start = urb_sf_start + urb_sf_size;
666
	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
667
 
668
	while ((sna->kgem.nbatch & 15) > 12)
669
		OUT_BATCH(MI_NOOP);
670
 
671
	OUT_BATCH(GEN4_URB_FENCE |
672
		  UF0_CS_REALLOC |
673
		  UF0_SF_REALLOC |
674
		  UF0_CLIP_REALLOC |
675
		  UF0_GS_REALLOC |
676
		  UF0_VS_REALLOC |
677
		  1);
678
	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
679
		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
680
		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
681
	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
682
		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
683
 
684
	/* Constant buffer state */
685
	OUT_BATCH(GEN4_CS_URB_STATE | 0);
686
	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
687
 
688
	sna->render_state.gen4.needs_urb = false;
689
}
690
 
691
static void
692
gen4_emit_state_base_address(struct sna *sna)
693
{
694
	assert(sna->render_state.gen4.general_bo->proxy == NULL);
695
	OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
696
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
697
				 sna->kgem.nbatch,
698
				 sna->render_state.gen4.general_bo,
699
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
700
				 BASE_ADDRESS_MODIFY));
701
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
702
				 sna->kgem.nbatch,
703
				 NULL,
704
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
705
				 BASE_ADDRESS_MODIFY));
706
	OUT_BATCH(0); /* media */
707
 
708
	/* upper bounds, all disabled */
709
	OUT_BATCH(BASE_ADDRESS_MODIFY);
710
	OUT_BATCH(0);
711
}
712
 
713
static void
714
gen4_emit_invariant(struct sna *sna)
715
{
716
	assert(sna->kgem.surface == sna->kgem.batch_size);
717
 
718
	if (sna->kgem.gen >= 045)
719
		OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
720
	else
721
		OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
722
 
723
	gen4_emit_state_base_address(sna);
724
 
725
	sna->render_state.gen4.needs_invariant = false;
726
}
727
 
728
static void
729
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
730
{
731
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
732
 
4251 Serge 733
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
3291 Serge 734
		DBG(("%s: flushing batch: %d < %d+%d\n",
735
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
736
		     150, 4*8));
737
		kgem_submit(&sna->kgem);
738
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
739
	}
740
 
741
	if (sna->render_state.gen4.needs_invariant)
742
		gen4_emit_invariant(sna);
743
}
744
 
745
static void
746
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
747
{
748
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
749
	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
750
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
751
			gen4_vertex_finish(sna);
752
 
753
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
754
		     sna->render_state.gen4.floats_per_vertex,
755
		     op->floats_per_vertex,
756
		     sna->render.vertex_index,
757
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
758
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
759
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
760
		sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
761
	}
762
}
763
 
764
static void
765
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
766
{
767
	if (sna->render_state.gen4.surface_table == offset)
768
		return;
769
 
770
	sna->render_state.gen4.surface_table = offset;
771
 
772
	/* Binding table pointers */
773
	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
774
	OUT_BATCH(0);		/* vs */
775
	OUT_BATCH(0);		/* gs */
776
	OUT_BATCH(0);		/* clip */
777
	OUT_BATCH(0);		/* sf */
778
	/* Only the PS uses the binding table */
779
	OUT_BATCH(offset*4);
780
}
781
 
782
static void
783
gen4_emit_pipelined_pointers(struct sna *sna,
784
			     const struct sna_composite_op *op,
785
			     int blend, int kernel)
786
{
787
	uint16_t sp, bp;
788
	uint32_t key;
789
 
790
	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
791
	     __FUNCTION__, op->u.gen4.ve_id & 2,
792
	     op->src.filter, op->src.repeat,
793
	     op->mask.filter, op->mask.repeat,
794
	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
795
 
796
	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
797
			    op->mask.filter, op->mask.repeat,
798
			    kernel);
799
	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
800
 
801
	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
802
	key = sp | (uint32_t)bp << 16;
803
	if (key == sna->render_state.gen4.last_pipelined_pointers)
804
		return;
805
 
806
	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
807
	OUT_BATCH(sna->render_state.gen4.vs);
808
	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
809
	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
810
	OUT_BATCH(sna->render_state.gen4.sf);
811
	OUT_BATCH(sna->render_state.gen4.wm + sp);
812
	OUT_BATCH(sna->render_state.gen4.cc + bp);
813
 
814
	sna->render_state.gen4.last_pipelined_pointers = key;
815
	gen4_emit_urb(sna);
816
}
817
 
818
static bool
819
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
820
{
821
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
822
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
823
 
824
	assert(!too_large(op->dst.x, op->dst.y));
825
	assert(!too_large(op->dst.width, op->dst.height));
826
 
827
	if (sna->render_state.gen4.drawrect_limit == limit &&
828
	    sna->render_state.gen4.drawrect_offset == offset)
829
		return true;
830
 
831
	sna->render_state.gen4.drawrect_offset = offset;
832
	sna->render_state.gen4.drawrect_limit = limit;
833
 
834
	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
835
	OUT_BATCH(0);
836
	OUT_BATCH(limit);
837
	OUT_BATCH(offset);
838
	return false;
839
}
840
 
841
static void
842
gen4_emit_vertex_elements(struct sna *sna,
843
			  const struct sna_composite_op *op)
844
{
845
	/*
846
	 * vertex data in vertex buffer
847
	 *    position: (x, y)
848
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
849
	 *    texture coordinate 1 if (has_mask is true): same as above
850
	 */
851
	struct gen4_render_state *render = &sna->render_state.gen4;
852
	uint32_t src_format, dw;
853
	int id = op->u.gen4.ve_id;
854
 
855
	if (render->ve_id == id)
856
		return;
857
	render->ve_id = id;
858
 
859
	/* The VUE layout
860
	 *    dword 0-3: position (x, y, 1.0, 1.0),
861
	 *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
862
	 *    [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
863
	 */
864
	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
865
 
866
	/* x,y */
867
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
868
		  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
869
 
870
	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
871
		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
872
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
873
		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
874
		  (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
875
 
876
	/* u0, v0, w0 */
877
	/* u0, v0, w0 */
878
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
879
	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
880
	switch (id & 3) {
881
	default:
882
		assert(0);
883
	case 0:
884
		src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
885
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
886
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
887
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
888
		break;
889
	case 1:
890
		src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
891
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
892
		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
893
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
894
		break;
895
	case 2:
896
		src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
897
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
898
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
899
		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
900
		break;
901
	case 3:
902
		src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
903
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
904
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
905
		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
906
		break;
907
	}
908
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
909
		  src_format << VE0_FORMAT_SHIFT |
910
		  4 << VE0_OFFSET_SHIFT);
911
	OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
912
 
913
	/* u1, v1, w1 */
914
	if (id >> 2) {
915
		unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
916
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
917
		     id >> 2, src_offset));
918
		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
919
		switch (id >> 2) {
920
		case 1:
921
			src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
922
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
923
			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
924
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
925
			break;
926
		default:
927
			assert(0);
928
		case 2:
929
			src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
930
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
931
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
932
			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
933
			break;
934
		case 3:
935
			src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
936
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
937
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
938
			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
939
			break;
940
		}
941
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
942
			  src_format << VE0_FORMAT_SHIFT |
943
			  src_offset << VE0_OFFSET_SHIFT);
944
		OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
945
	} else {
946
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
947
			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
948
 
949
		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
950
			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
951
			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
952
			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
953
			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
954
	}
955
}
956
 
957
static void
958
gen4_emit_state(struct sna *sna,
959
		const struct sna_composite_op *op,
960
		uint16_t wm_binding_table)
961
{
962
	bool flush;
963
 
4251 Serge 964
	assert(op->dst.bo->exec);
965
 
3291 Serge 966
	flush = wm_binding_table & 1;
967
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
968
		DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
969
		     kgem_bo_is_dirty(op->src.bo),
970
		     kgem_bo_is_dirty(op->mask.bo),
971
		     flush));
972
		OUT_BATCH(MI_FLUSH);
973
		kgem_clear_dirty(&sna->kgem);
974
		kgem_bo_mark_dirty(op->dst.bo);
975
		flush = false;
976
	}
977
	flush &= gen4_emit_drawing_rectangle(sna, op);
978
	if (flush && op->op > PictOpSrc)
979
		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
980
 
981
	gen4_emit_binding_table(sna, wm_binding_table & ~1);
982
	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
983
	gen4_emit_vertex_elements(sna, op);
984
}
985
 
986
static void
987
gen4_bind_surfaces(struct sna *sna,
988
		   const struct sna_composite_op *op)
989
{
990
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
991
	uint32_t *binding_table;
992
	uint16_t offset;
993
 
994
	gen4_get_batch(sna, op);
995
 
996
	binding_table = gen4_composite_get_binding_table(sna, &offset);
997
 
998
	binding_table[0] =
999
		gen4_bind_bo(sna,
1000
			    op->dst.bo, op->dst.width, op->dst.height,
1001
			    gen4_get_dest_format(op->dst.format),
1002
			    true);
1003
	binding_table[1] =
1004
		gen4_bind_bo(sna,
1005
			     op->src.bo, op->src.width, op->src.height,
1006
			     op->src.card_format,
1007
			     false);
1008
	if (op->mask.bo) {
1009
		assert(op->u.gen4.ve_id >> 2);
1010
		binding_table[2] =
1011
			gen4_bind_bo(sna,
1012
				     op->mask.bo,
1013
				     op->mask.width,
1014
				     op->mask.height,
1015
				     op->mask.card_format,
1016
				     false);
1017
	}
1018
 
1019
	if (sna->kgem.surface == offset &&
1020
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
1021
	    (op->mask.bo == NULL ||
1022
	     sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
1023
		sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1024
		offset = sna->render_state.gen4.surface_table;
1025
	}
1026
 
1027
	gen4_emit_state(sna, op, offset | dirty);
1028
}
1029
 
1030
fastcall static void
1031
gen4_render_composite_blt(struct sna *sna,
1032
			  const struct sna_composite_op *op,
1033
			  const struct sna_composite_rectangles *r)
1034
{
1035
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1036
	     __FUNCTION__,
1037
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1038
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1039
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1040
	     r->width, r->height));
1041
 
1042
	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1043
	op->prim_emit(sna, op, r);
1044
}
1045
 
4251 Serge 1046
#if 0
1047
fastcall static void
1048
gen4_render_composite_box(struct sna *sna,
1049
			  const struct sna_composite_op *op,
1050
			  const BoxRec *box)
1051
{
1052
	struct sna_composite_rectangles r;
3291 Serge 1053
 
4251 Serge 1054
	DBG(("  %s: (%d, %d), (%d, %d)\n",
1055
	     __FUNCTION__,
1056
	     box->x1, box->y1, box->x2, box->y2));
3291 Serge 1057
 
4251 Serge 1058
	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
3291 Serge 1059
 
4251 Serge 1060
	r.dst.x = box->x1;
1061
	r.dst.y = box->y1;
1062
	r.width  = box->x2 - box->x1;
1063
	r.height = box->y2 - box->y1;
1064
	r.mask = r.src = r.dst;
3291 Serge 1065
 
4251 Serge 1066
	op->prim_emit(sna, op, &r);
1067
}
3291 Serge 1068
 
4251 Serge 1069
static void
1070
gen4_render_composite_boxes__blt(struct sna *sna,
1071
				 const struct sna_composite_op *op,
1072
				 const BoxRec *box, int nbox)
1073
{
1074
	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1075
	     __FUNCTION__, nbox, op->dst.x, op->dst.y,
1076
	     op->src.offset[0], op->src.offset[1],
1077
	     op->src.width, op->src.height,
1078
	     op->mask.offset[0], op->mask.offset[1],
1079
	     op->mask.width, op->mask.height));
3291 Serge 1080
 
4251 Serge 1081
	do {
1082
		int nbox_this_time;
3291 Serge 1083
 
4251 Serge 1084
		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1085
						     gen4_bind_surfaces);
1086
		nbox -= nbox_this_time;
3291 Serge 1087
 
4251 Serge 1088
		do {
1089
			struct sna_composite_rectangles r;
3291 Serge 1090
 
4251 Serge 1091
			DBG(("  %s: (%d, %d), (%d, %d)\n",
1092
			     __FUNCTION__,
1093
			     box->x1, box->y1, box->x2, box->y2));
3291 Serge 1094
 
4251 Serge 1095
			r.dst.x = box->x1;
1096
			r.dst.y = box->y1;
1097
			r.width  = box->x2 - box->x1;
1098
			r.height = box->y2 - box->y1;
1099
			r.mask = r.src = r.dst;
1100
			op->prim_emit(sna, op, &r);
1101
			box++;
1102
		} while (--nbox_this_time);
1103
	} while (nbox);
1104
}
3291 Serge 1105
 
4251 Serge 1106
static void
1107
gen4_render_composite_boxes(struct sna *sna,
1108
			    const struct sna_composite_op *op,
1109
			    const BoxRec *box, int nbox)
1110
{
1111
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
3291 Serge 1112
 
4251 Serge 1113
	do {
1114
		int nbox_this_time;
1115
		float *v;
3291 Serge 1116
 
4251 Serge 1117
		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1118
						     gen4_bind_surfaces);
1119
		assert(nbox_this_time);
1120
		nbox -= nbox_this_time;
3291 Serge 1121
 
4251 Serge 1122
		v = sna->render.vertices + sna->render.vertex_used;
1123
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
3291 Serge 1124
 
4251 Serge 1125
		op->emit_boxes(op, box, nbox_this_time, v);
1126
		box += nbox_this_time;
1127
	} while (nbox);
1128
}
3291 Serge 1129
 
4251 Serge 1130
#if !FORCE_FLUSH
1131
static void
1132
gen4_render_composite_boxes__thread(struct sna *sna,
1133
				    const struct sna_composite_op *op,
1134
				    const BoxRec *box, int nbox)
1135
{
1136
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
3291 Serge 1137
 
4251 Serge 1138
	sna_vertex_lock(&sna->render);
1139
	do {
1140
		int nbox_this_time;
1141
		float *v;
1142
 
1143
		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1144
						     gen4_bind_surfaces);
1145
		assert(nbox_this_time);
1146
		nbox -= nbox_this_time;
1147
 
1148
		v = sna->render.vertices + sna->render.vertex_used;
1149
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1150
 
1151
		sna_vertex_acquire__locked(&sna->render);
1152
		sna_vertex_unlock(&sna->render);
1153
 
1154
		op->emit_boxes(op, box, nbox_this_time, v);
1155
		box += nbox_this_time;
1156
 
1157
		sna_vertex_lock(&sna->render);
1158
		sna_vertex_release__locked(&sna->render);
1159
	} while (nbox);
1160
	sna_vertex_unlock(&sna->render);
1161
}
1162
#endif
1163
 
1164
#ifndef MAX
1165
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1166
#endif
1167
 
1168
static uint32_t gen4_bind_video_source(struct sna *sna,
1169
				       struct kgem_bo *src_bo,
1170
				       uint32_t src_offset,
1171
				       int src_width,
1172
				       int src_height,
1173
				       int src_pitch,
1174
				       uint32_t src_surf_format)
1175
{
1176
	struct gen4_surface_state *ss;
1177
 
1178
	sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1179
 
1180
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1181
	ss->ss0.surface_type = GEN4_SURFACE_2D;
1182
	ss->ss0.surface_format = src_surf_format;
1183
	ss->ss0.color_blend = 1;
1184
 
1185
	ss->ss1.base_addr =
1186
		kgem_add_reloc(&sna->kgem,
1187
			       sna->kgem.surface + 1,
1188
			       src_bo,
1189
			       I915_GEM_DOMAIN_SAMPLER << 16,
1190
			       src_offset);
1191
 
1192
	ss->ss2.width  = src_width - 1;
1193
	ss->ss2.height = src_height - 1;
1194
	ss->ss3.pitch  = src_pitch - 1;
1195
 
1196
	return sna->kgem.surface * sizeof(uint32_t);
1197
}
1198
 
1199
static void gen4_video_bind_surfaces(struct sna *sna,
1200
				     const struct sna_composite_op *op)
1201
{
1202
	bool dirty = kgem_bo_is_dirty(op->dst.bo);
1203
	struct sna_video_frame *frame = op->priv;
1204
	uint32_t src_surf_format;
1205
	uint32_t src_surf_base[6];
1206
	int src_width[6];
1207
	int src_height[6];
1208
	int src_pitch[6];
1209
	uint32_t *binding_table;
1210
	uint16_t offset;
1211
	int n_src, n;
1212
 
1213
	src_surf_base[0] = 0;
1214
	src_surf_base[1] = 0;
1215
	src_surf_base[2] = frame->VBufOffset;
1216
	src_surf_base[3] = frame->VBufOffset;
1217
	src_surf_base[4] = frame->UBufOffset;
1218
	src_surf_base[5] = frame->UBufOffset;
1219
 
1220
	if (is_planar_fourcc(frame->id)) {
1221
		src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
1222
		src_width[1]  = src_width[0]  = frame->width;
1223
		src_height[1] = src_height[0] = frame->height;
1224
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1225
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1226
			frame->width / 2;
1227
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1228
			frame->height / 2;
1229
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1230
			frame->pitch[0];
1231
		n_src = 6;
1232
	} else {
1233
		if (frame->id == FOURCC_UYVY)
1234
			src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
1235
		else
1236
			src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
1237
 
1238
		src_width[0]  = frame->width;
1239
		src_height[0] = frame->height;
1240
		src_pitch[0]  = frame->pitch[0];
1241
		n_src = 1;
1242
	}
1243
 
1244
	gen4_get_batch(sna, op);
1245
 
1246
	binding_table = gen4_composite_get_binding_table(sna, &offset);
1247
	binding_table[0] =
1248
		gen4_bind_bo(sna,
1249
			     op->dst.bo, op->dst.width, op->dst.height,
1250
			     gen4_get_dest_format(op->dst.format),
1251
			     true);
1252
	for (n = 0; n < n_src; n++) {
1253
		binding_table[1+n] =
1254
			gen4_bind_video_source(sna,
1255
					       frame->bo,
1256
					       src_surf_base[n],
1257
					       src_width[n],
1258
					       src_height[n],
1259
					       src_pitch[n],
1260
					       src_surf_format);
1261
	}
1262
 
1263
	gen4_emit_state(sna, op, offset | dirty);
1264
}
1265
 
1266
static bool
1267
gen4_render_video(struct sna *sna,
1268
		  struct sna_video *video,
1269
		  struct sna_video_frame *frame,
1270
		  RegionPtr dstRegion,
1271
		  PixmapPtr pixmap)
1272
{
1273
	struct sna_composite_op tmp;
1274
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1275
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1276
	int src_width = frame->src.x2 - frame->src.x1;
1277
	int src_height = frame->src.y2 - frame->src.y1;
1278
	float src_offset_x, src_offset_y;
1279
	float src_scale_x, src_scale_y;
1280
	int nbox, pix_xoff, pix_yoff;
1281
	struct sna_pixmap *priv;
1282
	BoxPtr box;
1283
 
1284
	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1285
	     src_width, src_height, dst_width, dst_height));
1286
 
1287
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1288
	if (priv == NULL)
1289
		return false;
1290
 
1291
	memset(&tmp, 0, sizeof(tmp));
1292
 
1293
	tmp.op = PictOpSrc;
1294
	tmp.dst.pixmap = pixmap;
1295
	tmp.dst.width  = pixmap->drawable.width;
1296
	tmp.dst.height = pixmap->drawable.height;
1297
	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1298
	tmp.dst.bo = priv->gpu_bo;
1299
 
1300
	if (src_width == dst_width && src_height == dst_height)
1301
		tmp.src.filter = SAMPLER_FILTER_NEAREST;
1302
	else
1303
		tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1304
	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1305
	tmp.src.bo = frame->bo;
1306
	tmp.mask.bo = NULL;
1307
	tmp.u.gen4.wm_kernel =
1308
		is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
1309
	tmp.u.gen4.ve_id = 2;
1310
	tmp.is_affine = true;
1311
	tmp.floats_per_vertex = 3;
1312
	tmp.floats_per_rect = 9;
1313
	tmp.priv = frame;
1314
 
1315
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1316
		kgem_submit(&sna->kgem);
1317
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1318
	}
1319
 
1320
	gen4_video_bind_surfaces(sna, &tmp);
1321
	gen4_align_vertex(sna, &tmp);
1322
 
1323
	/* Set up the offset for translating from the given region (in screen
1324
	 * coordinates) to the backing pixmap.
1325
	 */
1326
#ifdef COMPOSITE
1327
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1328
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1329
#else
1330
	pix_xoff = 0;
1331
	pix_yoff = 0;
1332
#endif
1333
 
1334
	src_scale_x = (float)src_width / dst_width / frame->width;
1335
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1336
 
1337
	src_scale_y = (float)src_height / dst_height / frame->height;
1338
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1339
 
1340
	box = REGION_RECTS(dstRegion);
1341
	nbox = REGION_NUM_RECTS(dstRegion);
1342
	do {
1343
		int n;
1344
 
1345
		n = gen4_get_rectangles(sna, &tmp, nbox,
1346
					gen4_video_bind_surfaces);
1347
		assert(n);
1348
		nbox -= n;
1349
 
1350
		do {
1351
			BoxRec r;
1352
 
1353
			r.x1 = box->x1 + pix_xoff;
1354
			r.x2 = box->x2 + pix_xoff;
1355
			r.y1 = box->y1 + pix_yoff;
1356
			r.y2 = box->y2 + pix_yoff;
1357
 
1358
			OUT_VERTEX(r.x2, r.y2);
1359
			OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1360
			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1361
 
1362
			OUT_VERTEX(r.x1, r.y2);
1363
			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1364
			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1365
 
1366
			OUT_VERTEX(r.x1, r.y1);
1367
			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1368
			OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1369
 
1370
			if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1371
				sna_damage_add_box(&priv->gpu_damage, &r);
1372
				sna_damage_subtract_box(&priv->cpu_damage, &r);
1373
			}
1374
			box++;
1375
		} while (--n);
1376
	} while (nbox);
1377
	gen4_vertex_flush(sna);
1378
 
1379
	return true;
1380
}
1381
 
1382
static int
1383
gen4_composite_picture(struct sna *sna,
1384
		       PicturePtr picture,
1385
		       struct sna_composite_channel *channel,
1386
		       int x, int y,
1387
		       int w, int h,
1388
		       int dst_x, int dst_y,
1389
		       bool precise)
1390
{
1391
	PixmapPtr pixmap;
1392
	uint32_t color;
1393
	int16_t dx, dy;
1394
 
1395
	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1396
	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1397
 
1398
	channel->is_solid = false;
1399
	channel->card_format = -1;
1400
 
1401
	if (sna_picture_is_solid(picture, &color))
1402
		return gen4_channel_init_solid(sna, channel, color);
1403
 
1404
	if (picture->pDrawable == NULL) {
1405
		int ret;
1406
 
1407
		if (picture->pSourcePict->type == SourcePictTypeLinear)
1408
			return gen4_channel_init_linear(sna, picture, channel,
1409
							x, y,
1410
							w, h,
1411
							dst_x, dst_y);
1412
 
1413
		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1414
		ret = -1;
1415
		if (!precise)
1416
			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1417
								      x, y, w, h, dst_x, dst_y);
1418
		if (ret == -1)
1419
			ret = sna_render_picture_fixup(sna, picture, channel,
1420
						       x, y, w, h, dst_x, dst_y);
1421
		return ret;
1422
	}
1423
 
1424
	if (picture->alphaMap) {
1425
		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1426
		return sna_render_picture_fixup(sna, picture, channel,
1427
						x, y, w, h, dst_x, dst_y);
1428
	}
1429
 
1430
	if (!gen4_check_repeat(picture)) {
1431
		DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
1432
		return sna_render_picture_fixup(sna, picture, channel,
1433
						x, y, w, h, dst_x, dst_y);
1434
	}
1435
 
1436
	if (!gen4_check_filter(picture)) {
1437
		DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
1438
		return sna_render_picture_fixup(sna, picture, channel,
1439
						x, y, w, h, dst_x, dst_y);
1440
	}
1441
 
1442
	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1443
	channel->filter = picture->filter;
1444
 
1445
	pixmap = get_drawable_pixmap(picture->pDrawable);
1446
	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1447
 
1448
	x += dx + picture->pDrawable->x;
1449
	y += dy + picture->pDrawable->y;
1450
 
1451
	channel->is_affine = sna_transform_is_affine(picture->transform);
1452
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1453
		DBG(("%s: integer translation (%d, %d), removing\n",
1454
		     __FUNCTION__, dx, dy));
1455
		x += dx;
1456
		y += dy;
1457
		channel->transform = NULL;
1458
		channel->filter = PictFilterNearest;
1459
	} else
1460
		channel->transform = picture->transform;
1461
 
1462
	channel->pict_format = picture->format;
1463
	channel->card_format = gen4_get_card_format(picture->format);
1464
	if (channel->card_format == -1)
1465
		return sna_render_picture_convert(sna, picture, channel, pixmap,
1466
						  x, y, w, h, dst_x, dst_y,
1467
						  false);
1468
 
1469
	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1470
		return sna_render_picture_extract(sna, picture, channel,
1471
						  x, y, w, h, dst_x, dst_y);
1472
 
1473
	return sna_render_pixmap_bo(sna, channel, pixmap,
1474
				    x, y, w, h, dst_x, dst_y);
1475
}
1476
 
1477
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
1478
{
1479
	DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
1480
	     __FUNCTION__,
1481
	     channel->repeat, gen4_repeat(channel->repeat),
1482
	     channel->filter, gen4_repeat(channel->filter)));
1483
	channel->repeat = gen4_repeat(channel->repeat);
1484
	channel->filter = gen4_filter(channel->filter);
1485
	if (channel->card_format == (unsigned)-1)
1486
		channel->card_format = gen4_get_card_format(channel->pict_format);
1487
}
1488
#endif
1489
 
3291 Serge 1490
static void
1491
gen4_render_composite_done(struct sna *sna,
1492
			   const struct sna_composite_op *op)
1493
{
1494
	DBG(("%s()\n", __FUNCTION__));
1495
 
1496
	if (sna->render.vertex_offset) {
1497
		gen4_vertex_flush(sna);
1498
		gen4_magic_ca_pass(sna, op);
1499
	}
1500
 
1501
}
1502
 
4251 Serge 1503
#if 0
1504
static bool
1505
gen4_composite_set_target(struct sna *sna,
1506
			  struct sna_composite_op *op,
1507
			  PicturePtr dst,
1508
			  int x, int y, int w, int h,
1509
			  bool partial)
1510
{
1511
	BoxRec box;
3291 Serge 1512
 
4251 Serge 1513
	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1514
	op->dst.width  = op->dst.pixmap->drawable.width;
1515
	op->dst.height = op->dst.pixmap->drawable.height;
1516
	op->dst.format = dst->format;
1517
	if (w && h) {
1518
		box.x1 = x;
1519
		box.y1 = y;
1520
		box.x2 = x + w;
1521
		box.y2 = y + h;
1522
	} else
1523
		sna_render_picture_extents(dst, &box);
3291 Serge 1524
 
4251 Serge 1525
	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1526
					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1527
					  &box, &op->damage);
1528
	if (op->dst.bo == NULL)
1529
		return false;
3291 Serge 1530
 
4251 Serge 1531
	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1532
			    &op->dst.x, &op->dst.y);
3291 Serge 1533
 
4251 Serge 1534
	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1535
	     __FUNCTION__,
1536
	     op->dst.pixmap, (int)op->dst.format,
1537
	     op->dst.width, op->dst.height,
1538
	     op->dst.bo->pitch,
1539
	     op->dst.x, op->dst.y,
1540
	     op->damage ? *op->damage : (void *)-1));
3291 Serge 1541
 
4251 Serge 1542
	assert(op->dst.bo->proxy == NULL);
3291 Serge 1543
 
4251 Serge 1544
	if (too_large(op->dst.width, op->dst.height) &&
1545
	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1546
		return false;
3291 Serge 1547
 
4251 Serge 1548
	return true;
1549
}
3291 Serge 1550
 
4251 Serge 1551
static bool
1552
try_blt(struct sna *sna,
1553
	PicturePtr dst, PicturePtr src,
1554
	int width, int height)
1555
{
1556
	if (sna->kgem.mode != KGEM_RENDER) {
1557
		DBG(("%s: already performing BLT\n", __FUNCTION__));
1558
		return true;
1559
	}
3291 Serge 1560
 
4251 Serge 1561
	if (too_large(width, height)) {
1562
		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
1563
		     __FUNCTION__, width, height));
1564
		return true;
1565
	}
3291 Serge 1566
 
4251 Serge 1567
	if (too_large(dst->pDrawable->width, dst->pDrawable->height))
1568
		return true;
3291 Serge 1569
 
4251 Serge 1570
	/* The blitter is much faster for solids */
1571
	if (sna_picture_is_solid(src, NULL))
1572
		return true;
3291 Serge 1573
 
4251 Serge 1574
	/* is the source picture only in cpu memory e.g. a shm pixmap? */
1575
	return picture_is_cpu(sna, src);
1576
}
3291 Serge 1577
 
4251 Serge 1578
static bool
1579
check_gradient(PicturePtr picture, bool precise)
1580
{
1581
	switch (picture->pSourcePict->type) {
1582
	case SourcePictTypeSolidFill:
1583
	case SourcePictTypeLinear:
1584
		return false;
1585
	default:
1586
		return precise;
1587
	}
1588
}
3291 Serge 1589
 
4251 Serge 1590
static bool
1591
has_alphamap(PicturePtr p)
1592
{
1593
	return p->alphaMap != NULL;
1594
}
3291 Serge 1595
 
4251 Serge 1596
static bool
1597
need_upload(struct sna *sna, PicturePtr p)
1598
{
1599
	return p->pDrawable && untransformed(p) &&
1600
		!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1601
}
3291 Serge 1602
 
4251 Serge 1603
static bool
1604
source_is_busy(PixmapPtr pixmap)
1605
{
1606
	struct sna_pixmap *priv = sna_pixmap(pixmap);
1607
	if (priv == NULL)
1608
		return false;
3291 Serge 1609
 
4251 Serge 1610
	if (priv->clear)
1611
		return false;
3291 Serge 1612
 
4251 Serge 1613
	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1614
		return true;
3291 Serge 1615
 
4251 Serge 1616
	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1617
		return true;
3291 Serge 1618
 
4251 Serge 1619
	return priv->gpu_damage && !priv->cpu_damage;
1620
}
3291 Serge 1621
 
4251 Serge 1622
static bool
1623
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1624
{
1625
	if (sna_picture_is_solid(p, NULL))
1626
		return false;
3291 Serge 1627
 
4251 Serge 1628
	if (p->pSourcePict)
1629
		return check_gradient(p, precise);
3291 Serge 1630
 
4251 Serge 1631
	if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
1632
		return true;
3291 Serge 1633
 
4251 Serge 1634
	/* soft errors: perfer to upload/compute rather than readback */
1635
	if (pixmap && source_is_busy(pixmap))
1636
		return false;
3291 Serge 1637
 
4251 Serge 1638
	return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
1639
}
3291 Serge 1640
 
4251 Serge 1641
static bool
1642
gen4_composite_fallback(struct sna *sna,
1643
			PicturePtr src,
1644
			PicturePtr mask,
1645
			PicturePtr dst)
1646
{
1647
	PixmapPtr src_pixmap;
1648
	PixmapPtr mask_pixmap;
1649
	PixmapPtr dst_pixmap;
1650
	bool src_fallback, mask_fallback;
3291 Serge 1651
 
4251 Serge 1652
	if (!gen4_check_dst_format(dst->format)) {
1653
		DBG(("%s: unknown destination format: %d\n",
1654
		     __FUNCTION__, dst->format));
1655
		return true;
1656
	}
3291 Serge 1657
 
4251 Serge 1658
	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
3291 Serge 1659
 
4251 Serge 1660
	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1661
	src_fallback = source_fallback(sna, src, src_pixmap,
1662
				       dst->polyMode == PolyModePrecise);
3291 Serge 1663
 
4251 Serge 1664
	if (mask) {
1665
		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1666
		mask_fallback = source_fallback(sna, mask, mask_pixmap,
1667
						dst->polyMode == PolyModePrecise);
1668
	} else {
1669
		mask_pixmap = NULL;
1670
		mask_fallback = false;
1671
	}
3291 Serge 1672
 
4251 Serge 1673
	/* If we are using the destination as a source and need to
1674
	 * readback in order to upload the source, do it all
1675
	 * on the cpu.
1676
	 */
1677
	if (src_pixmap == dst_pixmap && src_fallback) {
1678
		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1679
		return true;
1680
	}
1681
	if (mask_pixmap == dst_pixmap && mask_fallback) {
1682
		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1683
		return true;
1684
	}
3291 Serge 1685
 
4251 Serge 1686
	/* If anything is on the GPU, push everything out to the GPU */
1687
	if (dst_use_gpu(dst_pixmap)) {
1688
		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1689
		     __FUNCTION__));
1690
		return false;
1691
	}
3291 Serge 1692
 
4251 Serge 1693
	if (src_pixmap && !src_fallback) {
1694
		DBG(("%s: src is already on the GPU, try to use GPU\n",
1695
		     __FUNCTION__));
1696
		return false;
1697
	}
1698
	if (mask_pixmap && !mask_fallback) {
1699
		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1700
		     __FUNCTION__));
1701
		return false;
1702
	}
3291 Serge 1703
 
4251 Serge 1704
	/* However if the dst is not on the GPU and we need to
1705
	 * render one of the sources using the CPU, we may
1706
	 * as well do the entire operation in place onthe CPU.
1707
	 */
1708
	if (src_fallback) {
1709
		DBG(("%s: dst is on the CPU and src will fallback\n",
1710
		     __FUNCTION__));
1711
		return true;
1712
	}
3291 Serge 1713
 
4251 Serge 1714
	if (mask_fallback) {
1715
		DBG(("%s: dst is on the CPU and mask will fallback\n",
1716
		     __FUNCTION__));
1717
		return true;
1718
	}
3291 Serge 1719
 
4251 Serge 1720
	if (too_large(dst_pixmap->drawable.width,
1721
		      dst_pixmap->drawable.height) &&
1722
	    dst_is_cpu(dst_pixmap)) {
1723
		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1724
		return true;
1725
	}
3291 Serge 1726
 
4251 Serge 1727
	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1728
	     __FUNCTION__));
1729
	return dst_use_cpu(dst_pixmap);
1730
}
3291 Serge 1731
 
4251 Serge 1732
static int
1733
reuse_source(struct sna *sna,
1734
	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1735
	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1736
{
1737
	uint32_t color;
3291 Serge 1738
 
4251 Serge 1739
	if (src_x != msk_x || src_y != msk_y)
1740
		return false;
3291 Serge 1741
 
4251 Serge 1742
	if (src == mask) {
1743
		DBG(("%s: mask is source\n", __FUNCTION__));
1744
		*mc = *sc;
1745
		mc->bo = kgem_bo_reference(mc->bo);
1746
		return true;
1747
	}
1748
 
1749
	if (sna_picture_is_solid(mask, &color))
1750
		return gen4_channel_init_solid(sna, mc, color);
1751
 
1752
	if (sc->is_solid)
1753
		return false;
1754
 
1755
	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1756
		return false;
1757
 
1758
	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1759
 
1760
	if (!sna_transform_equal(src->transform, mask->transform))
1761
		return false;
1762
 
1763
	if (!sna_picture_alphamap_equal(src, mask))
1764
		return false;
1765
 
1766
	if (!gen4_check_repeat(mask))
1767
		return false;
1768
 
1769
	if (!gen4_check_filter(mask))
1770
		return false;
1771
 
1772
	if (!gen4_check_format(mask->format))
1773
		return false;
1774
 
1775
	DBG(("%s: reusing source channel for mask with a twist\n",
1776
	     __FUNCTION__));
1777
 
1778
	*mc = *sc;
1779
	mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1780
	mc->filter = gen4_filter(mask->filter);
1781
	mc->pict_format = mask->format;
1782
	mc->card_format = gen4_get_card_format(mask->format);
1783
	mc->bo = kgem_bo_reference(mc->bo);
1784
	return true;
1785
}
1786
 
3291 Serge 1787
static bool
4251 Serge 1788
gen4_render_composite(struct sna *sna,
1789
		      uint8_t op,
1790
		      PicturePtr src,
1791
		      PicturePtr mask,
1792
		      PicturePtr dst,
1793
		      int16_t src_x, int16_t src_y,
1794
		      int16_t msk_x, int16_t msk_y,
1795
		      int16_t dst_x, int16_t dst_y,
1796
		      int16_t width, int16_t height,
1797
		      struct sna_composite_op *tmp)
3291 Serge 1798
{
4251 Serge 1799
	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1800
	     width, height, sna->kgem.mode));
3291 Serge 1801
 
4251 Serge 1802
	if (op >= ARRAY_SIZE(gen4_blend_op))
1803
		return false;
3291 Serge 1804
 
4251 Serge 1805
	if (mask == NULL &&
1806
	    try_blt(sna, dst, src, width, height) &&
1807
	    sna_blt_composite(sna, op,
1808
			      src, dst,
1809
			      src_x, src_y,
1810
			      dst_x, dst_y,
1811
			      width, height,
1812
			      tmp, false))
1813
		return true;
3291 Serge 1814
 
4251 Serge 1815
	if (gen4_composite_fallback(sna, src, mask, dst))
1816
		return false;
3291 Serge 1817
 
4251 Serge 1818
	if (need_tiling(sna, width, height))
1819
		return sna_tiling_composite(op, src, mask, dst,
1820
					    src_x, src_y,
1821
					    msk_x, msk_y,
1822
					    dst_x, dst_y,
1823
					    width, height,
1824
					    tmp);
3291 Serge 1825
 
4251 Serge 1826
	if (!gen4_composite_set_target(sna, tmp, dst,
1827
				       dst_x, dst_y, width, height,
1828
				       op > PictOpSrc || dst->pCompositeClip->data)) {
1829
		DBG(("%s: failed to set composite target\n", __FUNCTION__));
1830
		return false;
1831
	}
3291 Serge 1832
 
4251 Serge 1833
	tmp->op = op;
1834
	switch (gen4_composite_picture(sna, src, &tmp->src,
1835
				       src_x, src_y,
1836
				       width, height,
1837
				       dst_x, dst_y,
1838
				       dst->polyMode == PolyModePrecise)) {
1839
	case -1:
1840
		DBG(("%s: failed to prepare source\n", __FUNCTION__));
1841
		goto cleanup_dst;
1842
	case 0:
1843
		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1844
			goto cleanup_dst;
1845
		/* fall through to fixup */
1846
	case 1:
1847
		if (mask == NULL &&
1848
		    sna_blt_composite__convert(sna,
1849
					       dst_x, dst_y, width, height,
1850
					       tmp))
1851
			return true;
3291 Serge 1852
 
4251 Serge 1853
		gen4_composite_channel_convert(&tmp->src);
1854
		break;
1855
	}
1856
 
3291 Serge 1857
	tmp->is_affine = tmp->src.is_affine;
1858
	tmp->has_component_alpha = false;
1859
	tmp->need_magic_ca_pass = false;
1860
 
4251 Serge 1861
	if (mask) {
1862
		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1863
			tmp->has_component_alpha = true;
3291 Serge 1864
 
4251 Serge 1865
			/* Check if it's component alpha that relies on a source alpha and on
1866
			 * the source value.  We can only get one of those into the single
1867
			 * source value that we get to blend with.
1868
			 */
1869
			if (gen4_blend_op[op].src_alpha &&
1870
			    (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
1871
				if (op != PictOpOver) {
1872
					DBG(("%s -- fallback: unhandled component alpha blend\n",
1873
					     __FUNCTION__));
3291 Serge 1874
 
4251 Serge 1875
					goto cleanup_src;
1876
				}
3291 Serge 1877
 
4251 Serge 1878
				tmp->need_magic_ca_pass = true;
1879
				tmp->op = PictOpOutReverse;
1880
			}
1881
		}
3291 Serge 1882
 
4251 Serge 1883
		if (!reuse_source(sna,
1884
				  src, &tmp->src, src_x, src_y,
1885
				  mask, &tmp->mask, msk_x, msk_y)) {
1886
			switch (gen4_composite_picture(sna, mask, &tmp->mask,
1887
						       msk_x, msk_y,
1888
						       width, height,
1889
						       dst_x, dst_y,
1890
						       dst->polyMode == PolyModePrecise)) {
1891
			case -1:
1892
				DBG(("%s: failed to prepare mask\n", __FUNCTION__));
1893
				goto cleanup_src;
1894
			case 0:
1895
				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
1896
					goto cleanup_src;
1897
				/* fall through to fixup */
1898
			case 1:
1899
				gen4_composite_channel_convert(&tmp->mask);
1900
				break;
1901
			}
1902
		}
3291 Serge 1903
 
4251 Serge 1904
		tmp->is_affine &= tmp->mask.is_affine;
1905
	}
1906
 
3291 Serge 1907
	tmp->u.gen4.wm_kernel =
1908
		gen4_choose_composite_kernel(tmp->op,
1909
					     tmp->mask.bo != NULL,
1910
					     tmp->has_component_alpha,
1911
					     tmp->is_affine);
4251 Serge 1912
	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
3291 Serge 1913
 
1914
	tmp->blt   = gen4_render_composite_blt;
4251 Serge 1915
	tmp->box   = gen4_render_composite_box;
1916
	tmp->boxes = gen4_render_composite_boxes__blt;
1917
	if (tmp->emit_boxes) {
1918
		tmp->boxes = gen4_render_composite_boxes;
1919
#if !FORCE_FLUSH
1920
		tmp->thread_boxes = gen4_render_composite_boxes__thread;
1921
#endif
1922
	}
3291 Serge 1923
	tmp->done  = gen4_render_composite_done;
1924
 
1925
	if (!kgem_check_bo(&sna->kgem,
1926
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
1927
			   NULL)) {
1928
		kgem_submit(&sna->kgem);
4251 Serge 1929
		if (!kgem_check_bo(&sna->kgem,
1930
				     tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
1931
				     NULL))
1932
			goto cleanup_mask;
3291 Serge 1933
	}
1934
 
1935
	gen4_bind_surfaces(sna, tmp);
1936
	gen4_align_vertex(sna, tmp);
1937
	return true;
4251 Serge 1938
 
1939
cleanup_mask:
1940
	if (tmp->mask.bo)
1941
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
1942
cleanup_src:
1943
	if (tmp->src.bo)
1944
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
1945
cleanup_dst:
1946
	if (tmp->redirect.real_bo)
1947
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
1948
	return false;
3291 Serge 1949
}
1950
 
4251 Serge 1951
#endif
1952
 
1953
 
1954
 
1955
 
1956
 
1957
 
1958
 
1959
 
1960
 
1961
 
1962
 
1963
 
1964
 
1965
 
1966
 
1967
 
1968
 
1969
 
1970
 
1971
 
1972
 
1973
 
1974
 
1975
 
1976
 
1977
 
1978
 
1979
 
1980
 
1981
 
1982
 
1983
 
1984
 
1985
 
1986
 
1987
 
1988
 
1989
 
1990
 
1991
 
1992
 
3291 Serge 1993
static void
1994
gen4_render_flush(struct sna *sna)
1995
{
1996
	gen4_vertex_close(sna);
1997
 
1998
	assert(sna->render.vb_id == 0);
1999
	assert(sna->render.vertex_offset == 0);
2000
}
2001
 
2002
static void
2003
discard_vbo(struct sna *sna)
2004
{
2005
	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
2006
	sna->render.vbo = NULL;
2007
	sna->render.vertices = sna->render.vertex_data;
2008
	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
2009
	sna->render.vertex_used = 0;
2010
	sna->render.vertex_index = 0;
2011
}
2012
 
2013
static void
2014
gen4_render_retire(struct kgem *kgem)
2015
{
2016
	struct sna *sna;
2017
 
2018
	sna = container_of(kgem, struct sna, kgem);
2019
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
2020
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
2021
		sna->render.vertex_used = 0;
2022
		sna->render.vertex_index = 0;
2023
	}
2024
}
2025
 
2026
static void
2027
gen4_render_expire(struct kgem *kgem)
2028
{
2029
	struct sna *sna;
2030
 
2031
	sna = container_of(kgem, struct sna, kgem);
2032
	if (sna->render.vbo && !sna->render.vertex_used) {
2033
		DBG(("%s: discarding vbo\n", __FUNCTION__));
2034
		discard_vbo(sna);
2035
	}
2036
}
2037
 
2038
static void gen4_render_reset(struct sna *sna)
2039
{
2040
	sna->render_state.gen4.needs_invariant = true;
2041
	sna->render_state.gen4.needs_urb = true;
2042
	sna->render_state.gen4.ve_id = -1;
2043
	sna->render_state.gen4.last_primitive = -1;
2044
	sna->render_state.gen4.last_pipelined_pointers = -1;
2045
 
2046
	sna->render_state.gen4.drawrect_offset = -1;
2047
	sna->render_state.gen4.drawrect_limit = -1;
2048
	sna->render_state.gen4.surface_table = -1;
2049
 
2050
	if (sna->render.vbo &&
2051
	    !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
2052
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
2053
		discard_vbo(sna);
2054
	}
2055
 
2056
	sna->render.vertex_offset = 0;
2057
	sna->render.nvertex_reloc = 0;
2058
	sna->render.vb_id = 0;
2059
}
2060
 
2061
static void gen4_render_fini(struct sna *sna)
2062
{
2063
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
2064
}
2065
 
2066
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
2067
{
2068
	struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
2069
 
2070
	/* Set up the vertex shader to be disabled (passthrough) */
2071
	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
2072
	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
2073
	vs->vs6.vs_enable = 0;
2074
	vs->vs6.vert_cache_disable = 1;
2075
 
2076
	return sna_static_stream_offsetof(stream, vs);
2077
}
2078
 
2079
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
4251 Serge 2080
				     uint32_t kernel)
3291 Serge 2081
{
2082
	struct gen4_sf_unit_state *sf;
2083
 
2084
	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
2085
 
2086
	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
2087
	sf->thread0.kernel_start_pointer = kernel >> 6;
2088
	sf->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
2089
	sf->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
2090
	sf->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
2091
	/* don't smash vertex header, read start from dw8 */
2092
	sf->thread3.urb_entry_read_offset = 1;
2093
	sf->thread3.dispatch_grf_start_reg = 3;
2094
	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
2095
	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
2096
	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
2097
	sf->sf5.viewport_transform = false;	/* skip viewport */
2098
	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
2099
	sf->sf6.scissor = 0;
2100
	sf->sf7.trifan_pv = 2;
2101
	sf->sf6.dest_org_vbias = 0x8;
2102
	sf->sf6.dest_org_hbias = 0x8;
2103
 
2104
	return sna_static_stream_offsetof(stream, sf);
2105
}
2106
 
2107
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
2108
					  sampler_filter_t src_filter,
2109
					  sampler_extend_t src_extend,
2110
					  sampler_filter_t mask_filter,
2111
					  sampler_extend_t mask_extend)
2112
{
2113
	struct gen4_sampler_state *sampler_state;
2114
 
2115
	sampler_state = sna_static_stream_map(stream,
2116
					      sizeof(struct gen4_sampler_state) * 2,
2117
					      32);
2118
	sampler_state_init(&sampler_state[0], src_filter, src_extend);
2119
	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
2120
 
2121
	return sna_static_stream_offsetof(stream, sampler_state);
2122
}
2123
 
2124
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
2125
			       int gen,
2126
			       bool has_mask,
2127
			       uint32_t kernel,
2128
			       uint32_t sampler)
2129
{
2130
	assert((kernel & 63) == 0);
2131
	wm->thread0.kernel_start_pointer = kernel >> 6;
2132
	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
2133
 
2134
	wm->thread1.single_program_flow = 0;
2135
 
2136
	wm->thread3.const_urb_entry_read_length = 0;
2137
	wm->thread3.const_urb_entry_read_offset = 0;
2138
 
2139
	wm->thread3.urb_entry_read_offset = 0;
2140
	wm->thread3.dispatch_grf_start_reg = 3;
2141
 
2142
	assert((sampler & 31) == 0);
2143
	wm->wm4.sampler_state_pointer = sampler >> 5;
2144
	wm->wm4.sampler_count = 1;
2145
 
2146
	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
2147
	wm->wm5.transposed_urb_read = 0;
2148
	wm->wm5.thread_dispatch_enable = 1;
2149
	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
2150
	 * start point
2151
	 */
2152
	wm->wm5.enable_16_pix = 1;
2153
	wm->wm5.enable_8_pix = 0;
2154
	wm->wm5.early_depth_test = 1;
2155
 
2156
	/* Each pair of attributes (src/mask coords) is two URB entries */
2157
	if (has_mask) {
2158
		wm->thread1.binding_table_entry_count = 3;
2159
		wm->thread3.urb_entry_read_length = 4;
2160
	} else {
2161
		wm->thread1.binding_table_entry_count = 2;
2162
		wm->thread3.urb_entry_read_length = 2;
2163
	}
2164
}
2165
 
2166
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
2167
{
2168
	uint8_t *ptr, *base;
2169
	int i, j;
2170
 
2171
	base = ptr =
2172
		sna_static_stream_map(stream,
2173
				      GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
2174
				      64);
2175
 
2176
	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
2177
		for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
2178
			struct gen4_cc_unit_state *state =
2179
				(struct gen4_cc_unit_state *)ptr;
2180
 
2181
			state->cc3.blend_enable =
2182
				!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
2183
 
2184
			state->cc5.logicop_func = 0xc;	/* COPY */
2185
			state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
2186
 
2187
			/* Fill in alpha blend factors same as color, for the future. */
2188
			state->cc5.ia_src_blend_factor = i;
2189
			state->cc5.ia_dest_blend_factor = j;
2190
 
2191
			state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
2192
			state->cc6.clamp_post_alpha_blend = 1;
2193
			state->cc6.clamp_pre_alpha_blend = 1;
2194
			state->cc6.src_blend_factor = i;
2195
			state->cc6.dest_blend_factor = j;
2196
 
2197
			ptr += 64;
2198
		}
2199
	}
2200
 
2201
	return sna_static_stream_offsetof(stream, base);
2202
}
2203
 
2204
static bool gen4_render_setup(struct sna *sna)
2205
{
2206
	struct gen4_render_state *state = &sna->render_state.gen4;
2207
	struct sna_static_stream general;
2208
	struct gen4_wm_unit_state_padded *wm_state;
2209
	uint32_t sf, wm[KERNEL_COUNT];
2210
	int i, j, k, l, m;
2211
 
2212
	sna_static_stream_init(&general);
2213
 
2214
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2215
	 * dumps, you know it points to zero.
2216
	 */
2217
	null_create(&general);
2218
 
2219
	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
2220
	for (m = 0; m < KERNEL_COUNT; m++) {
2221
		if (wm_kernels[m].size) {
2222
			wm[m] = sna_static_stream_add(&general,
2223
						      wm_kernels[m].data,
2224
						      wm_kernels[m].size,
2225
						      64);
2226
		} else {
2227
			wm[m] = sna_static_stream_compile_wm(sna, &general,
2228
							     wm_kernels[m].data,
2229
							     16);
2230
		}
2231
	}
2232
 
2233
	state->vs = gen4_create_vs_unit_state(&general);
4251 Serge 2234
	state->sf = gen4_create_sf_state(&general, sf);
3291 Serge 2235
 
2236
	wm_state = sna_static_stream_map(&general,
2237
					  sizeof(*wm_state) * KERNEL_COUNT *
2238
					  FILTER_COUNT * EXTEND_COUNT *
2239
					  FILTER_COUNT * EXTEND_COUNT,
2240
					  64);
2241
	state->wm = sna_static_stream_offsetof(&general, wm_state);
2242
	for (i = 0; i < FILTER_COUNT; i++) {
2243
		for (j = 0; j < EXTEND_COUNT; j++) {
2244
			for (k = 0; k < FILTER_COUNT; k++) {
2245
				for (l = 0; l < EXTEND_COUNT; l++) {
2246
					uint32_t sampler_state;
2247
 
2248
					sampler_state =
2249
						gen4_create_sampler_state(&general,
2250
									  i, j,
2251
									  k, l);
2252
 
2253
					for (m = 0; m < KERNEL_COUNT; m++) {
2254
						gen4_init_wm_state(&wm_state->state,
2255
								   sna->kgem.gen,
2256
								   wm_kernels[m].has_mask,
2257
								   wm[m], sampler_state);
2258
						wm_state++;
2259
					}
2260
				}
2261
			}
2262
		}
2263
	}
2264
 
2265
	state->cc = gen4_create_cc_unit_state(&general);
2266
 
2267
	state->general_bo = sna_static_stream_fini(sna, &general);
2268
	return state->general_bo != NULL;
2269
}
2270
 
4251 Serge 2271
const char *gen4_render_init(struct sna *sna, const char *backend)
3291 Serge 2272
{
2273
	if (!gen4_render_setup(sna))
4251 Serge 2274
		return backend;
3291 Serge 2275
 
2276
	sna->kgem.retire = gen4_render_retire;
2277
	sna->kgem.expire = gen4_render_expire;
2278
 
4251 Serge 2279
#if 0
2280
#if !NO_COMPOSITE
2281
	sna->render.composite = gen4_render_composite;
3291 Serge 2282
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
4251 Serge 2283
#endif
2284
#if !NO_COMPOSITE_SPANS
2285
	sna->render.check_composite_spans = gen4_check_composite_spans;
2286
	sna->render.composite_spans = gen4_render_composite_spans;
2287
	if (0)
2288
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2289
#endif
3291 Serge 2290
 
4251 Serge 2291
#if !NO_VIDEO
2292
	sna->render.video = gen4_render_video;
2293
#endif
2294
 
2295
#if !NO_COPY_BOXES
2296
	sna->render.copy_boxes = gen4_render_copy_boxes;
2297
#endif
2298
#if !NO_COPY
2299
	sna->render.copy = gen4_render_copy;
2300
#endif
2301
 
2302
#if !NO_FILL_BOXES
2303
	sna->render.fill_boxes = gen4_render_fill_boxes;
2304
#endif
2305
#if !NO_FILL
2306
	sna->render.fill = gen4_render_fill;
2307
#endif
2308
#if !NO_FILL_ONE
2309
	sna->render.fill_one = gen4_render_fill_one;
2310
#endif
2311
 
2312
#endif
2313
 
3291 Serge 2314
    sna->render.blit_tex = gen4_blit_tex;
4251 Serge 2315
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
3291 Serge 2316
 
2317
	sna->render.flush = gen4_render_flush;
2318
	sna->render.reset = gen4_render_reset;
2319
	sna->render.fini = gen4_render_fini;
2320
 
2321
	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
2322
	sna->render.max_3d_pitch = 1 << 18;
4251 Serge 2323
	return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
2324
}
2325
 
2326
static bool
2327
gen4_blit_tex(struct sna *sna,
2328
              uint8_t op, bool scale,
2329
		      PixmapPtr src, struct kgem_bo *src_bo,
2330
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2331
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2332
              int32_t src_x, int32_t src_y,
2333
              int32_t msk_x, int32_t msk_y,
2334
              int32_t dst_x, int32_t dst_y,
2335
              int32_t width, int32_t height,
2336
              struct sna_composite_op *tmp)
2337
{
2338
 
2339
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2340
         width, height, sna->kgem.ring));
2341
 
2342
    tmp->op = PictOpSrc;
2343
 
2344
    tmp->dst.pixmap = dst;
2345
    tmp->dst.bo     = dst_bo;
2346
    tmp->dst.width  = dst->drawable.width;
2347
    tmp->dst.height = dst->drawable.height;
2348
    tmp->dst.format = PICT_x8r8g8b8;
2349
 
2350
 
2351
	tmp->src.repeat = RepeatNone;
2352
	tmp->src.filter = PictFilterNearest;
2353
    tmp->src.is_affine = true;
2354
 
2355
    tmp->src.bo = src_bo;
2356
	tmp->src.pict_format = PICT_x8r8g8b8;
2357
    tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
2358
    tmp->src.width  = src->drawable.width;
2359
    tmp->src.height = src->drawable.height;
2360
 
2361
	tmp->is_affine = tmp->src.is_affine;
2362
	tmp->has_component_alpha = false;
2363
	tmp->need_magic_ca_pass = false;
2364
 
2365
 	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2366
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2367
    tmp->mask.is_affine = true;
2368
 
2369
    tmp->mask.bo = mask_bo;
2370
    tmp->mask.pict_format = PIXMAN_a8;
2371
    tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
2372
    tmp->mask.width  = mask->drawable.width;
2373
    tmp->mask.height = mask->drawable.height;
2374
 
2375
    if( scale )
2376
    {
2377
        tmp->src.scale[0] = 1.f/width;
2378
        tmp->src.scale[1] = 1.f/height;
2379
    }
2380
    else
2381
    {
2382
        tmp->src.scale[0] = 1.f/src->drawable.width;
2383
        tmp->src.scale[1] = 1.f/src->drawable.height;
2384
    }
2385
//    tmp->src.offset[0] = -dst_x;
2386
//    tmp->src.offset[1] = -dst_y;
2387
 
2388
 
2389
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2390
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2391
//    tmp->mask.offset[0] = -dst_x;
2392
//    tmp->mask.offset[1] = -dst_y;
2393
 
4281 Serge 2394
    tmp->u.gen4.wm_kernel = WM_KERNEL_MASK;
2395
//       gen4_choose_composite_kernel(tmp->op,
2396
//                        tmp->mask.bo != NULL,
2397
//                        tmp->has_component_alpha,
2398
//                        tmp->is_affine);
4251 Serge 2399
	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
2400
 
2401
	tmp->blt   = gen4_render_composite_blt;
2402
	tmp->done  = gen4_render_composite_done;
2403
 
2404
	if (!kgem_check_bo(&sna->kgem,
2405
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2406
			   NULL)) {
2407
		kgem_submit(&sna->kgem);
2408
	}
2409
 
2410
	gen4_bind_surfaces(sna, tmp);
2411
	gen4_align_vertex(sna, tmp);
3291 Serge 2412
	return true;
2413
}
2414