Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright 2012 Advanced Micro Devices, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Christian König 
25
 */
26
 
27
#include "si_pipe.h"
28
#include "si_shader.h"
29
#include "sid.h"
30
#include "radeon/r600_cs.h"
31
 
32
#include "util/u_format.h"
33
#include "util/u_format_s3tc.h"
34
#include "util/u_memory.h"
35
#include "util/u_pstipple.h"
36
 
37
static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
38
			 void (*emit)(struct si_context *ctx, struct r600_atom *state),
39
			 unsigned num_dw)
40
{
41
	atom->emit = (void*)emit;
42
	atom->num_dw = num_dw;
43
	atom->dirty = false;
44
	*list_elem = atom;
45
}
46
 
47
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
48
{
49
	if (sscreen->b.chip_class == CIK &&
50
	    sscreen->b.info.cik_macrotile_mode_array_valid) {
51
		unsigned index, tileb;
52
 
53
		tileb = 8 * 8 * tex->surface.bpe;
54
		tileb = MIN2(tex->surface.tile_split, tileb);
55
 
56
		for (index = 0; tileb > 64; index++) {
57
			tileb >>= 1;
58
		}
59
		assert(index < 16);
60
 
61
		return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
62
	}
63
 
64
	if (sscreen->b.chip_class == SI &&
65
	    sscreen->b.info.si_tile_mode_array_valid) {
66
		/* Don't use stencil_tiling_index, because num_banks is always
67
		 * read from the depth mode. */
68
		unsigned tile_mode_index = tex->surface.tiling_index[0];
69
		assert(tile_mode_index < 32);
70
 
71
		return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]);
72
	}
73
 
74
	/* The old way. */
75
	switch (sscreen->b.tiling_info.num_banks) {
76
	case 2:
77
		return V_02803C_ADDR_SURF_2_BANK;
78
	case 4:
79
		return V_02803C_ADDR_SURF_4_BANK;
80
	case 8:
81
	default:
82
		return V_02803C_ADDR_SURF_8_BANK;
83
	case 16:
84
		return V_02803C_ADDR_SURF_16_BANK;
85
	}
86
}
87
 
88
unsigned cik_tile_split(unsigned tile_split)
89
{
90
	switch (tile_split) {
91
	case 64:
92
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
93
		break;
94
	case 128:
95
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
96
		break;
97
	case 256:
98
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
99
		break;
100
	case 512:
101
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
102
		break;
103
	default:
104
	case 1024:
105
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
106
		break;
107
	case 2048:
108
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
109
		break;
110
	case 4096:
111
		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
112
		break;
113
	}
114
	return tile_split;
115
}
116
 
117
unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
118
{
119
	switch (macro_tile_aspect) {
120
	default:
121
	case 1:
122
		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
123
		break;
124
	case 2:
125
		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
126
		break;
127
	case 4:
128
		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
129
		break;
130
	case 8:
131
		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
132
		break;
133
	}
134
	return macro_tile_aspect;
135
}
136
 
137
unsigned cik_bank_wh(unsigned bankwh)
138
{
139
	switch (bankwh) {
140
	default:
141
	case 1:
142
		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
143
		break;
144
	case 2:
145
		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
146
		break;
147
	case 4:
148
		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
149
		break;
150
	case 8:
151
		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
152
		break;
153
	}
154
	return bankwh;
155
}
156
 
157
unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
158
{
159
	if (sscreen->b.info.si_tile_mode_array_valid) {
160
		uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
161
 
162
		return G_009910_PIPE_CONFIG(gb_tile_mode);
163
	}
164
 
165
	/* This is probably broken for a lot of chips, but it's only used
166
	 * if the kernel cannot return the tile mode array for CIK. */
167
	switch (sscreen->b.info.r600_num_tile_pipes) {
168
	case 16:
169
		return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
170
	case 8:
171
		return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
172
	case 4:
173
	default:
174
		if (sscreen->b.info.r600_num_backends == 4)
175
			return V_02803C_X_ADDR_SURF_P4_16X16;
176
		else
177
			return V_02803C_X_ADDR_SURF_P4_8X16;
178
	case 2:
179
		return V_02803C_ADDR_SURF_P2;
180
	}
181
}
182
 
183
static unsigned si_map_swizzle(unsigned swizzle)
184
{
185
	switch (swizzle) {
186
	case UTIL_FORMAT_SWIZZLE_Y:
187
		return V_008F0C_SQ_SEL_Y;
188
	case UTIL_FORMAT_SWIZZLE_Z:
189
		return V_008F0C_SQ_SEL_Z;
190
	case UTIL_FORMAT_SWIZZLE_W:
191
		return V_008F0C_SQ_SEL_W;
192
	case UTIL_FORMAT_SWIZZLE_0:
193
		return V_008F0C_SQ_SEL_0;
194
	case UTIL_FORMAT_SWIZZLE_1:
195
		return V_008F0C_SQ_SEL_1;
196
	default: /* UTIL_FORMAT_SWIZZLE_X */
197
		return V_008F0C_SQ_SEL_X;
198
	}
199
}
200
 
201
static uint32_t S_FIXED(float value, uint32_t frac_bits)
202
{
203
	return value * (1 << frac_bits);
204
}
205
 
206
/* 12.4 fixed-point */
207
static unsigned si_pack_float_12p4(float x)
208
{
209
	return x <= 0    ? 0 :
210
	       x >= 4096 ? 0xffff : x * 16;
211
}
212
 
213
/*
214
 * Inferred framebuffer and blender state.
215
 *
216
 * One of the reasons this must be derived from the framebuffer state is that:
217
 * - The blend state mask is 0xf most of the time.
218
 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
219
 *   so COLOR1 is enabled pretty much all the time.
220
 * So CB_TARGET_MASK is the only register that can disable COLOR1.
221
 */
222
static void si_update_fb_blend_state(struct si_context *sctx)
223
{
224
	struct si_pm4_state *pm4;
225
	struct si_state_blend *blend = sctx->queued.named.blend;
226
	uint32_t mask = 0, i;
227
 
228
	if (blend == NULL)
229
		return;
230
 
231
	pm4 = CALLOC_STRUCT(si_pm4_state);
232
	if (pm4 == NULL)
233
		return;
234
 
235
	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
236
		if (sctx->framebuffer.state.cbufs[i])
237
			mask |= 0xf << (4*i);
238
	mask &= blend->cb_target_mask;
239
 
240
	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
241
	si_pm4_set_state(sctx, fb_blend, pm4);
242
}
243
 
244
/*
245
 * Blender functions
246
 */
247
 
248
static uint32_t si_translate_blend_function(int blend_func)
249
{
250
	switch (blend_func) {
251
	case PIPE_BLEND_ADD:
252
		return V_028780_COMB_DST_PLUS_SRC;
253
	case PIPE_BLEND_SUBTRACT:
254
		return V_028780_COMB_SRC_MINUS_DST;
255
	case PIPE_BLEND_REVERSE_SUBTRACT:
256
		return V_028780_COMB_DST_MINUS_SRC;
257
	case PIPE_BLEND_MIN:
258
		return V_028780_COMB_MIN_DST_SRC;
259
	case PIPE_BLEND_MAX:
260
		return V_028780_COMB_MAX_DST_SRC;
261
	default:
262
		R600_ERR("Unknown blend function %d\n", blend_func);
263
		assert(0);
264
		break;
265
	}
266
	return 0;
267
}
268
 
269
static uint32_t si_translate_blend_factor(int blend_fact)
270
{
271
	switch (blend_fact) {
272
	case PIPE_BLENDFACTOR_ONE:
273
		return V_028780_BLEND_ONE;
274
	case PIPE_BLENDFACTOR_SRC_COLOR:
275
		return V_028780_BLEND_SRC_COLOR;
276
	case PIPE_BLENDFACTOR_SRC_ALPHA:
277
		return V_028780_BLEND_SRC_ALPHA;
278
	case PIPE_BLENDFACTOR_DST_ALPHA:
279
		return V_028780_BLEND_DST_ALPHA;
280
	case PIPE_BLENDFACTOR_DST_COLOR:
281
		return V_028780_BLEND_DST_COLOR;
282
	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
283
		return V_028780_BLEND_SRC_ALPHA_SATURATE;
284
	case PIPE_BLENDFACTOR_CONST_COLOR:
285
		return V_028780_BLEND_CONSTANT_COLOR;
286
	case PIPE_BLENDFACTOR_CONST_ALPHA:
287
		return V_028780_BLEND_CONSTANT_ALPHA;
288
	case PIPE_BLENDFACTOR_ZERO:
289
		return V_028780_BLEND_ZERO;
290
	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
291
		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
292
	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
293
		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
294
	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
295
		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
296
	case PIPE_BLENDFACTOR_INV_DST_COLOR:
297
		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
298
	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
299
		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
300
	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
301
		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
302
	case PIPE_BLENDFACTOR_SRC1_COLOR:
303
		return V_028780_BLEND_SRC1_COLOR;
304
	case PIPE_BLENDFACTOR_SRC1_ALPHA:
305
		return V_028780_BLEND_SRC1_ALPHA;
306
	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
307
		return V_028780_BLEND_INV_SRC1_COLOR;
308
	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
309
		return V_028780_BLEND_INV_SRC1_ALPHA;
310
	default:
311
		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
312
		assert(0);
313
		break;
314
	}
315
	return 0;
316
}
317
 
318
static void *si_create_blend_state_mode(struct pipe_context *ctx,
319
					const struct pipe_blend_state *state,
320
					unsigned mode)
321
{
322
	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
323
	struct si_pm4_state *pm4 = &blend->pm4;
324
 
325
	uint32_t color_control = 0;
326
 
327
	if (blend == NULL)
328
		return NULL;
329
 
330
	blend->alpha_to_one = state->alpha_to_one;
331
 
332
	if (state->logicop_enable) {
333
		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
334
	} else {
335
		color_control |= S_028808_ROP3(0xcc);
336
	}
337
 
338
	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
339
		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
340
		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
341
		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
342
		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
343
		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
344
 
345
	blend->cb_target_mask = 0;
346
	for (int i = 0; i < 8; i++) {
347
		/* state->rt entries > 0 only written if independent blending */
348
		const int j = state->independent_blend_enable ? i : 0;
349
 
350
		unsigned eqRGB = state->rt[j].rgb_func;
351
		unsigned srcRGB = state->rt[j].rgb_src_factor;
352
		unsigned dstRGB = state->rt[j].rgb_dst_factor;
353
		unsigned eqA = state->rt[j].alpha_func;
354
		unsigned srcA = state->rt[j].alpha_src_factor;
355
		unsigned dstA = state->rt[j].alpha_dst_factor;
356
 
357
		unsigned blend_cntl = 0;
358
 
359
		/* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
360
		blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
361
 
362
		if (!state->rt[j].blend_enable) {
363
			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
364
			continue;
365
		}
366
 
367
		blend_cntl |= S_028780_ENABLE(1);
368
		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
369
		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
370
		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
371
 
372
		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
373
			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
374
			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
375
			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
376
			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
377
		}
378
		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
379
	}
380
 
381
	if (blend->cb_target_mask) {
382
		color_control |= S_028808_MODE(mode);
383
	} else {
384
		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
385
	}
386
	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
387
 
388
	return blend;
389
}
390
 
391
static void *si_create_blend_state(struct pipe_context *ctx,
392
				   const struct pipe_blend_state *state)
393
{
394
	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
395
}
396
 
397
static void si_bind_blend_state(struct pipe_context *ctx, void *state)
398
{
399
	struct si_context *sctx = (struct si_context *)ctx;
400
	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
401
	si_update_fb_blend_state(sctx);
402
}
403
 
404
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
405
{
406
	struct si_context *sctx = (struct si_context *)ctx;
407
	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
408
}
409
 
410
static void si_set_blend_color(struct pipe_context *ctx,
411
			       const struct pipe_blend_color *state)
412
{
413
	struct si_context *sctx = (struct si_context *)ctx;
414
	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
415
 
416
        if (pm4 == NULL)
417
                return;
418
 
419
	si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
420
	si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
421
	si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
422
	si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
423
 
424
	si_pm4_set_state(sctx, blend_color, pm4);
425
}
426
 
427
/*
428
 * Clipping, scissors and viewport
429
 */
430
 
431
static void si_set_clip_state(struct pipe_context *ctx,
432
			      const struct pipe_clip_state *state)
433
{
434
	struct si_context *sctx = (struct si_context *)ctx;
435
	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
436
	struct pipe_constant_buffer cb;
437
 
438
	if (pm4 == NULL)
439
		return;
440
 
441
	for (int i = 0; i < 6; i++) {
442
		si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
443
			       fui(state->ucp[i][0]));
444
		si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
445
			       fui(state->ucp[i][1]));
446
		si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
447
			       fui(state->ucp[i][2]));
448
		si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
449
			       fui(state->ucp[i][3]));
450
        }
451
 
452
	cb.buffer = NULL;
453
	cb.user_buffer = state->ucp;
454
	cb.buffer_offset = 0;
455
	cb.buffer_size = 4*4*8;
456
	ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
457
	pipe_resource_reference(&cb.buffer, NULL);
458
 
459
	si_pm4_set_state(sctx, clip, pm4);
460
}
461
 
462
#define SIX_BITS 0x3F
463
 
464
static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
465
{
466
	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
467
	struct tgsi_shader_info *info = si_get_vs_info(sctx);
468
	unsigned window_space =
469
	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
470
	unsigned clipdist_mask =
471
		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
472
 
473
	r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
474
		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
475
		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
476
		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
477
		S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
478
		S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
479
		S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
480
					    info->writes_edgeflag ||
481
					    info->writes_layer) |
482
		(sctx->queued.named.rasterizer->clip_plane_enable &
483
		 clipdist_mask));
484
	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
485
		sctx->queued.named.rasterizer->pa_cl_clip_cntl |
486
		(clipdist_mask ? 0 :
487
		 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
488
		S_028810_CLIP_DISABLE(window_space));
489
}
490
 
491
static void si_set_scissor_states(struct pipe_context *ctx,
492
                                  unsigned start_slot,
493
                                  unsigned num_scissors,
494
                                  const struct pipe_scissor_state *state)
495
{
496
	struct si_context *sctx = (struct si_context *)ctx;
497
	struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
498
	struct si_pm4_state *pm4 = &scissor->pm4;
499
 
500
	if (scissor == NULL)
501
		return;
502
 
503
	scissor->scissor = *state;
504
	si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
505
		       S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
506
		       S_028250_WINDOW_OFFSET_DISABLE(1));
507
	si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
508
		       S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
509
 
510
	si_pm4_set_state(sctx, scissor, scissor);
511
}
512
 
513
static void si_set_viewport_states(struct pipe_context *ctx,
514
                                   unsigned start_slot,
515
                                   unsigned num_viewports,
516
                                   const struct pipe_viewport_state *state)
517
{
518
	struct si_context *sctx = (struct si_context *)ctx;
519
	struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
520
	struct si_pm4_state *pm4 = &viewport->pm4;
521
 
522
	if (viewport == NULL)
523
		return;
524
 
525
	viewport->viewport = *state;
526
	si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
527
	si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
528
	si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
529
	si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
530
	si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
531
	si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
532
 
533
	si_pm4_set_state(sctx, viewport, viewport);
534
}
535
 
536
/*
537
 * inferred state between framebuffer and rasterizer
538
 */
539
static void si_update_fb_rs_state(struct si_context *sctx)
540
{
541
	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
542
	struct si_pm4_state *pm4;
543
	float offset_units;
544
 
545
	if (!rs || !sctx->framebuffer.state.zsbuf)
546
		return;
547
 
548
	offset_units = sctx->queued.named.rasterizer->offset_units;
549
	switch (sctx->framebuffer.state.zsbuf->texture->format) {
550
	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
551
	case PIPE_FORMAT_X8Z24_UNORM:
552
	case PIPE_FORMAT_Z24X8_UNORM:
553
	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
554
		offset_units *= 2.0f;
555
		break;
556
	case PIPE_FORMAT_Z32_FLOAT:
557
	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
558
		offset_units *= 1.0f;
559
		break;
560
	case PIPE_FORMAT_Z16_UNORM:
561
		offset_units *= 4.0f;
562
		break;
563
	default:
564
		return;
565
	}
566
 
567
	pm4 = CALLOC_STRUCT(si_pm4_state);
568
 
569
	if (pm4 == NULL)
570
		return;
571
 
572
	/* FIXME some of those reg can be computed with cso */
573
	si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
574
		       fui(sctx->queued.named.rasterizer->offset_scale));
575
	si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
576
	si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
577
		       fui(sctx->queued.named.rasterizer->offset_scale));
578
	si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
579
 
580
	si_pm4_set_state(sctx, fb_rs, pm4);
581
}
582
 
583
/*
584
 * Rasterizer
585
 */
586
 
587
static uint32_t si_translate_fill(uint32_t func)
588
{
589
	switch(func) {
590
	case PIPE_POLYGON_MODE_FILL:
591
		return V_028814_X_DRAW_TRIANGLES;
592
	case PIPE_POLYGON_MODE_LINE:
593
		return V_028814_X_DRAW_LINES;
594
	case PIPE_POLYGON_MODE_POINT:
595
		return V_028814_X_DRAW_POINTS;
596
	default:
597
		assert(0);
598
		return V_028814_X_DRAW_POINTS;
599
	}
600
}
601
 
602
static void *si_create_rs_state(struct pipe_context *ctx,
603
				const struct pipe_rasterizer_state *state)
604
{
605
	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
606
	struct si_pm4_state *pm4 = &rs->pm4;
607
	unsigned tmp;
608
	float psize_min, psize_max;
609
 
610
	if (rs == NULL) {
611
		return NULL;
612
	}
613
 
614
	rs->two_side = state->light_twoside;
615
	rs->multisample_enable = state->multisample;
616
	rs->clip_plane_enable = state->clip_plane_enable;
617
	rs->line_stipple_enable = state->line_stipple_enable;
618
	rs->poly_stipple_enable = state->poly_stipple_enable;
619
	rs->line_smooth = state->line_smooth;
620
	rs->poly_smooth = state->poly_smooth;
621
 
622
	rs->flatshade = state->flatshade;
623
	rs->sprite_coord_enable = state->sprite_coord_enable;
624
	rs->pa_sc_line_stipple = state->line_stipple_enable ?
625
				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
626
				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
627
	rs->pa_cl_clip_cntl =
628
		S_028810_PS_UCP_MODE(3) |
629
		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
630
		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
631
		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
632
		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
633
		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
634
 
635
	/* offset */
636
	rs->offset_units = state->offset_units;
637
	rs->offset_scale = state->offset_scale * 12.0f;
638
 
639
	tmp = S_0286D4_FLAT_SHADE_ENA(1);
640
	if (state->sprite_coord_enable) {
641
		tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
642
			S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
643
			S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
644
			S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
645
			S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
646
		if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
647
			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
648
		}
649
	}
650
	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
651
 
652
	/* point size 12.4 fixed point */
653
	tmp = (unsigned)(state->point_size * 8.0);
654
	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
655
 
656
	if (state->point_size_per_vertex) {
657
		psize_min = util_get_min_point_size(state);
658
		psize_max = 8192;
659
	} else {
660
		/* Force the point size to be as if the vertex output was disabled. */
661
		psize_min = state->point_size;
662
		psize_max = state->point_size;
663
	}
664
	/* Divide by two, because 0.5 = 1 pixel. */
665
	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
666
			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
667
			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
668
 
669
	tmp = (unsigned)state->line_width * 8;
670
	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
671
	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
672
		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
673
		       S_028A48_MSAA_ENABLE(state->multisample ||
674
					    state->poly_smooth ||
675
					    state->line_smooth) |
676
		       S_028A48_VPORT_SCISSOR_ENABLE(state->scissor));
677
 
678
	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
679
		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
680
		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
681
 
682
	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
683
	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
684
		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
685
		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
686
		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
687
		S_028814_FACE(!state->front_ccw) |
688
		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
689
		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
690
		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
691
		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
692
				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
693
		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
694
		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
695
	return rs;
696
}
697
 
698
static void si_bind_rs_state(struct pipe_context *ctx, void *state)
699
{
700
	struct si_context *sctx = (struct si_context *)ctx;
701
	struct si_state_rasterizer *old_rs =
702
		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
703
	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
704
 
705
	if (state == NULL)
706
		return;
707
 
708
	if (sctx->framebuffer.nr_samples > 1 &&
709
	    (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
710
		sctx->db_render_state.dirty = true;
711
 
712
	si_pm4_bind_state(sctx, rasterizer, rs);
713
	si_update_fb_rs_state(sctx);
714
 
715
	sctx->clip_regs.dirty = true;
716
}
717
 
718
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
719
{
720
	struct si_context *sctx = (struct si_context *)ctx;
721
	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
722
}
723
 
724
/*
725
 * infeered state between dsa and stencil ref
726
 */
727
static void si_update_dsa_stencil_ref(struct si_context *sctx)
728
{
729
	struct si_pm4_state *pm4;
730
	struct pipe_stencil_ref *ref = &sctx->stencil_ref;
731
	struct si_state_dsa *dsa = sctx->queued.named.dsa;
732
 
733
	if (!dsa)
734
		return;
735
 
736
	pm4 = CALLOC_STRUCT(si_pm4_state);
737
	if (pm4 == NULL)
738
		return;
739
 
740
	si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
741
		       S_028430_STENCILTESTVAL(ref->ref_value[0]) |
742
		       S_028430_STENCILMASK(dsa->valuemask[0]) |
743
		       S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
744
		       S_028430_STENCILOPVAL(1));
745
	si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
746
		       S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
747
		       S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
748
		       S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
749
		       S_028434_STENCILOPVAL_BF(1));
750
 
751
	si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
752
}
753
 
754
static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
755
				    const struct pipe_stencil_ref *state)
756
{
757
        struct si_context *sctx = (struct si_context *)ctx;
758
        sctx->stencil_ref = *state;
759
	si_update_dsa_stencil_ref(sctx);
760
}
761
 
762
 
763
/*
764
 * DSA
765
 */
766
 
767
static uint32_t si_translate_stencil_op(int s_op)
768
{
769
	switch (s_op) {
770
	case PIPE_STENCIL_OP_KEEP:
771
		return V_02842C_STENCIL_KEEP;
772
	case PIPE_STENCIL_OP_ZERO:
773
		return V_02842C_STENCIL_ZERO;
774
	case PIPE_STENCIL_OP_REPLACE:
775
		return V_02842C_STENCIL_REPLACE_TEST;
776
	case PIPE_STENCIL_OP_INCR:
777
		return V_02842C_STENCIL_ADD_CLAMP;
778
	case PIPE_STENCIL_OP_DECR:
779
		return V_02842C_STENCIL_SUB_CLAMP;
780
	case PIPE_STENCIL_OP_INCR_WRAP:
781
		return V_02842C_STENCIL_ADD_WRAP;
782
	case PIPE_STENCIL_OP_DECR_WRAP:
783
		return V_02842C_STENCIL_SUB_WRAP;
784
	case PIPE_STENCIL_OP_INVERT:
785
		return V_02842C_STENCIL_INVERT;
786
	default:
787
		R600_ERR("Unknown stencil op %d", s_op);
788
		assert(0);
789
		break;
790
	}
791
	return 0;
792
}
793
 
794
static void *si_create_dsa_state(struct pipe_context *ctx,
795
				 const struct pipe_depth_stencil_alpha_state *state)
796
{
797
	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
798
	struct si_pm4_state *pm4 = &dsa->pm4;
799
	unsigned db_depth_control;
800
	uint32_t db_stencil_control = 0;
801
 
802
	if (dsa == NULL) {
803
		return NULL;
804
	}
805
 
806
	dsa->valuemask[0] = state->stencil[0].valuemask;
807
	dsa->valuemask[1] = state->stencil[1].valuemask;
808
	dsa->writemask[0] = state->stencil[0].writemask;
809
	dsa->writemask[1] = state->stencil[1].writemask;
810
 
811
	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
812
		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
813
		S_028800_ZFUNC(state->depth.func);
814
 
815
	/* stencil */
816
	if (state->stencil[0].enabled) {
817
		db_depth_control |= S_028800_STENCIL_ENABLE(1);
818
		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
819
		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
820
		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
821
		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
822
 
823
		if (state->stencil[1].enabled) {
824
			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
825
			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
826
			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
827
			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
828
			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
829
		}
830
	}
831
 
832
	/* alpha */
833
	if (state->alpha.enabled) {
834
		dsa->alpha_func = state->alpha.func;
835
 
836
		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
837
		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
838
	} else {
839
		dsa->alpha_func = PIPE_FUNC_ALWAYS;
840
	}
841
 
842
	/* misc */
843
	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
844
	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
845
 
846
	return dsa;
847
}
848
 
849
static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
850
{
851
        struct si_context *sctx = (struct si_context *)ctx;
852
        struct si_state_dsa *dsa = state;
853
 
854
        if (state == NULL)
855
                return;
856
 
857
	si_pm4_bind_state(sctx, dsa, dsa);
858
	si_update_dsa_stencil_ref(sctx);
859
}
860
 
861
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
862
{
863
	struct si_context *sctx = (struct si_context *)ctx;
864
	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
865
}
866
 
867
static void *si_create_db_flush_dsa(struct si_context *sctx)
868
{
869
	struct pipe_depth_stencil_alpha_state dsa = {};
870
 
871
	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
872
}
873
 
874
/* DB RENDER STATE */
875
 
876
static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
877
{
878
	struct si_context *sctx = (struct si_context*)ctx;
879
 
880
	sctx->db_render_state.dirty = true;
881
}
882
 
883
static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
884
{
885
	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
886
	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
887
	unsigned db_shader_control;
888
 
889
	r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
890
 
891
	/* DB_RENDER_CONTROL */
892
	if (sctx->dbcb_depth_copy_enabled ||
893
	    sctx->dbcb_stencil_copy_enabled) {
894
		radeon_emit(cs,
895
			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
896
			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
897
			    S_028000_COPY_CENTROID(1) |
898
			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
899
	} else if (sctx->db_inplace_flush_enabled) {
900
		radeon_emit(cs,
901
			    S_028000_DEPTH_COMPRESS_DISABLE(1) |
902
			    S_028000_STENCIL_COMPRESS_DISABLE(1));
903
	} else if (sctx->db_depth_clear) {
904
		radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
905
	} else {
906
		radeon_emit(cs, 0);
907
	}
908
 
909
	/* DB_COUNT_CONTROL (occlusion queries) */
910
	if (sctx->b.num_occlusion_queries > 0) {
911
		if (sctx->b.chip_class >= CIK) {
912
			radeon_emit(cs,
913
				    S_028004_PERFECT_ZPASS_COUNTS(1) |
914
				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
915
				    S_028004_ZPASS_ENABLE(1) |
916
				    S_028004_SLICE_EVEN_ENABLE(1) |
917
				    S_028004_SLICE_ODD_ENABLE(1));
918
		} else {
919
			radeon_emit(cs,
920
				    S_028004_PERFECT_ZPASS_COUNTS(1) |
921
				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
922
		}
923
	} else {
924
		/* Disable occlusion queries. */
925
		if (sctx->b.chip_class >= CIK) {
926
			radeon_emit(cs, 0);
927
		} else {
928
			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
929
		}
930
	}
931
 
932
	/* DB_RENDER_OVERRIDE2 */
933
	if (sctx->db_depth_disable_expclear) {
934
		r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
935
			S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
936
	} else {
937
		r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
938
	}
939
 
940
	db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
941
		            sctx->ps_db_shader_control;
942
 
943
	/* Bug workaround for smoothing (overrasterization) on SI. */
944
	if (sctx->b.chip_class == SI && sctx->smoothing_enabled)
945
		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
946
	else
947
		db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
948
 
949
	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
950
	if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
951
		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
952
 
953
	r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
954
			       db_shader_control);
955
}
956
 
957
/*
958
 * format translation
959
 */
960
static uint32_t si_translate_colorformat(enum pipe_format format)
961
{
962
	const struct util_format_description *desc = util_format_description(format);
963
 
964
#define HAS_SIZE(x,y,z,w) \
965
	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
966
         desc->channel[2].size == (z) && desc->channel[3].size == (w))
967
 
968
	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
969
		return V_028C70_COLOR_10_11_11;
970
 
971
	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
972
		return V_028C70_COLOR_INVALID;
973
 
974
	switch (desc->nr_channels) {
975
	case 1:
976
		switch (desc->channel[0].size) {
977
		case 8:
978
			return V_028C70_COLOR_8;
979
		case 16:
980
			return V_028C70_COLOR_16;
981
		case 32:
982
			return V_028C70_COLOR_32;
983
		}
984
		break;
985
	case 2:
986
		if (desc->channel[0].size == desc->channel[1].size) {
987
			switch (desc->channel[0].size) {
988
			case 8:
989
				return V_028C70_COLOR_8_8;
990
			case 16:
991
				return V_028C70_COLOR_16_16;
992
			case 32:
993
				return V_028C70_COLOR_32_32;
994
			}
995
		} else if (HAS_SIZE(8,24,0,0)) {
996
			return V_028C70_COLOR_24_8;
997
		} else if (HAS_SIZE(24,8,0,0)) {
998
			return V_028C70_COLOR_8_24;
999
		}
1000
		break;
1001
	case 3:
1002
		if (HAS_SIZE(5,6,5,0)) {
1003
			return V_028C70_COLOR_5_6_5;
1004
		} else if (HAS_SIZE(32,8,24,0)) {
1005
			return V_028C70_COLOR_X24_8_32_FLOAT;
1006
		}
1007
		break;
1008
	case 4:
1009
		if (desc->channel[0].size == desc->channel[1].size &&
1010
		    desc->channel[0].size == desc->channel[2].size &&
1011
		    desc->channel[0].size == desc->channel[3].size) {
1012
			switch (desc->channel[0].size) {
1013
			case 4:
1014
				return V_028C70_COLOR_4_4_4_4;
1015
			case 8:
1016
				return V_028C70_COLOR_8_8_8_8;
1017
			case 16:
1018
				return V_028C70_COLOR_16_16_16_16;
1019
			case 32:
1020
				return V_028C70_COLOR_32_32_32_32;
1021
			}
1022
		} else if (HAS_SIZE(5,5,5,1)) {
1023
			return V_028C70_COLOR_1_5_5_5;
1024
		} else if (HAS_SIZE(10,10,10,2)) {
1025
			return V_028C70_COLOR_2_10_10_10;
1026
		}
1027
		break;
1028
	}
1029
	return V_028C70_COLOR_INVALID;
1030
}
1031
 
1032
static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1033
{
1034
	if (SI_BIG_ENDIAN) {
1035
		switch(colorformat) {
1036
		/* 8-bit buffers. */
1037
		case V_028C70_COLOR_8:
1038
			return V_028C70_ENDIAN_NONE;
1039
 
1040
		/* 16-bit buffers. */
1041
		case V_028C70_COLOR_5_6_5:
1042
		case V_028C70_COLOR_1_5_5_5:
1043
		case V_028C70_COLOR_4_4_4_4:
1044
		case V_028C70_COLOR_16:
1045
		case V_028C70_COLOR_8_8:
1046
			return V_028C70_ENDIAN_8IN16;
1047
 
1048
		/* 32-bit buffers. */
1049
		case V_028C70_COLOR_8_8_8_8:
1050
		case V_028C70_COLOR_2_10_10_10:
1051
		case V_028C70_COLOR_8_24:
1052
		case V_028C70_COLOR_24_8:
1053
		case V_028C70_COLOR_16_16:
1054
			return V_028C70_ENDIAN_8IN32;
1055
 
1056
		/* 64-bit buffers. */
1057
		case V_028C70_COLOR_16_16_16_16:
1058
			return V_028C70_ENDIAN_8IN16;
1059
 
1060
		case V_028C70_COLOR_32_32:
1061
			return V_028C70_ENDIAN_8IN32;
1062
 
1063
		/* 128-bit buffers. */
1064
		case V_028C70_COLOR_32_32_32_32:
1065
			return V_028C70_ENDIAN_8IN32;
1066
		default:
1067
			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1068
		}
1069
	} else {
1070
		return V_028C70_ENDIAN_NONE;
1071
	}
1072
}
1073
 
1074
/* Returns the size in bits of the widest component of a CB format */
1075
static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1076
{
1077
	switch(colorformat) {
1078
	case V_028C70_COLOR_4_4_4_4:
1079
		return 4;
1080
 
1081
	case V_028C70_COLOR_1_5_5_5:
1082
	case V_028C70_COLOR_5_5_5_1:
1083
		return 5;
1084
 
1085
	case V_028C70_COLOR_5_6_5:
1086
		return 6;
1087
 
1088
	case V_028C70_COLOR_8:
1089
	case V_028C70_COLOR_8_8:
1090
	case V_028C70_COLOR_8_8_8_8:
1091
		return 8;
1092
 
1093
	case V_028C70_COLOR_10_10_10_2:
1094
	case V_028C70_COLOR_2_10_10_10:
1095
		return 10;
1096
 
1097
	case V_028C70_COLOR_10_11_11:
1098
	case V_028C70_COLOR_11_11_10:
1099
		return 11;
1100
 
1101
	case V_028C70_COLOR_16:
1102
	case V_028C70_COLOR_16_16:
1103
	case V_028C70_COLOR_16_16_16_16:
1104
		return 16;
1105
 
1106
	case V_028C70_COLOR_8_24:
1107
	case V_028C70_COLOR_24_8:
1108
		return 24;
1109
 
1110
	case V_028C70_COLOR_32:
1111
	case V_028C70_COLOR_32_32:
1112
	case V_028C70_COLOR_32_32_32_32:
1113
	case V_028C70_COLOR_X24_8_32_FLOAT:
1114
		return 32;
1115
	}
1116
 
1117
	assert(!"Unknown maximum component size");
1118
	return 0;
1119
}
1120
 
1121
static uint32_t si_translate_dbformat(enum pipe_format format)
1122
{
1123
	switch (format) {
1124
	case PIPE_FORMAT_Z16_UNORM:
1125
		return V_028040_Z_16;
1126
	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1127
	case PIPE_FORMAT_X8Z24_UNORM:
1128
	case PIPE_FORMAT_Z24X8_UNORM:
1129
	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1130
		return V_028040_Z_24; /* deprecated on SI */
1131
	case PIPE_FORMAT_Z32_FLOAT:
1132
	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1133
		return V_028040_Z_32_FLOAT;
1134
	default:
1135
		return V_028040_Z_INVALID;
1136
	}
1137
}
1138
 
1139
/*
1140
 * Texture translation
1141
 */
1142
 
1143
static uint32_t si_translate_texformat(struct pipe_screen *screen,
1144
				       enum pipe_format format,
1145
				       const struct util_format_description *desc,
1146
				       int first_non_void)
1147
{
1148
	struct si_screen *sscreen = (struct si_screen*)screen;
1149
	bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
1150
	boolean uniform = TRUE;
1151
	int i;
1152
 
1153
	/* Colorspace (return non-RGB formats directly). */
1154
	switch (desc->colorspace) {
1155
	/* Depth stencil formats */
1156
	case UTIL_FORMAT_COLORSPACE_ZS:
1157
		switch (format) {
1158
		case PIPE_FORMAT_Z16_UNORM:
1159
			return V_008F14_IMG_DATA_FORMAT_16;
1160
		case PIPE_FORMAT_X24S8_UINT:
1161
		case PIPE_FORMAT_Z24X8_UNORM:
1162
		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1163
			return V_008F14_IMG_DATA_FORMAT_8_24;
1164
		case PIPE_FORMAT_X8Z24_UNORM:
1165
		case PIPE_FORMAT_S8X24_UINT:
1166
		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1167
			return V_008F14_IMG_DATA_FORMAT_24_8;
1168
		case PIPE_FORMAT_S8_UINT:
1169
			return V_008F14_IMG_DATA_FORMAT_8;
1170
		case PIPE_FORMAT_Z32_FLOAT:
1171
			return V_008F14_IMG_DATA_FORMAT_32;
1172
		case PIPE_FORMAT_X32_S8X24_UINT:
1173
		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1174
			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1175
		default:
1176
			goto out_unknown;
1177
		}
1178
 
1179
	case UTIL_FORMAT_COLORSPACE_YUV:
1180
		goto out_unknown; /* TODO */
1181
 
1182
	case UTIL_FORMAT_COLORSPACE_SRGB:
1183
		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1184
			goto out_unknown;
1185
		break;
1186
 
1187
	default:
1188
		break;
1189
	}
1190
 
1191
	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1192
		if (!enable_s3tc)
1193
			goto out_unknown;
1194
 
1195
		switch (format) {
1196
		case PIPE_FORMAT_RGTC1_SNORM:
1197
		case PIPE_FORMAT_LATC1_SNORM:
1198
		case PIPE_FORMAT_RGTC1_UNORM:
1199
		case PIPE_FORMAT_LATC1_UNORM:
1200
			return V_008F14_IMG_DATA_FORMAT_BC4;
1201
		case PIPE_FORMAT_RGTC2_SNORM:
1202
		case PIPE_FORMAT_LATC2_SNORM:
1203
		case PIPE_FORMAT_RGTC2_UNORM:
1204
		case PIPE_FORMAT_LATC2_UNORM:
1205
			return V_008F14_IMG_DATA_FORMAT_BC5;
1206
		default:
1207
			goto out_unknown;
1208
		}
1209
	}
1210
 
1211
	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1212
		if (!enable_s3tc)
1213
			goto out_unknown;
1214
 
1215
		switch (format) {
1216
		case PIPE_FORMAT_BPTC_RGBA_UNORM:
1217
		case PIPE_FORMAT_BPTC_SRGBA:
1218
			return V_008F14_IMG_DATA_FORMAT_BC7;
1219
		case PIPE_FORMAT_BPTC_RGB_FLOAT:
1220
		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1221
			return V_008F14_IMG_DATA_FORMAT_BC6;
1222
		default:
1223
			goto out_unknown;
1224
		}
1225
	}
1226
 
1227
	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1228
		switch (format) {
1229
		case PIPE_FORMAT_R8G8_B8G8_UNORM:
1230
		case PIPE_FORMAT_G8R8_B8R8_UNORM:
1231
			return V_008F14_IMG_DATA_FORMAT_GB_GR;
1232
		case PIPE_FORMAT_G8R8_G8B8_UNORM:
1233
		case PIPE_FORMAT_R8G8_R8B8_UNORM:
1234
			return V_008F14_IMG_DATA_FORMAT_BG_RG;
1235
		default:
1236
			goto out_unknown;
1237
		}
1238
	}
1239
 
1240
	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1241
 
1242
		if (!enable_s3tc)
1243
			goto out_unknown;
1244
 
1245
		if (!util_format_s3tc_enabled) {
1246
			goto out_unknown;
1247
		}
1248
 
1249
		switch (format) {
1250
		case PIPE_FORMAT_DXT1_RGB:
1251
		case PIPE_FORMAT_DXT1_RGBA:
1252
		case PIPE_FORMAT_DXT1_SRGB:
1253
		case PIPE_FORMAT_DXT1_SRGBA:
1254
			return V_008F14_IMG_DATA_FORMAT_BC1;
1255
		case PIPE_FORMAT_DXT3_RGBA:
1256
		case PIPE_FORMAT_DXT3_SRGBA:
1257
			return V_008F14_IMG_DATA_FORMAT_BC2;
1258
		case PIPE_FORMAT_DXT5_RGBA:
1259
		case PIPE_FORMAT_DXT5_SRGBA:
1260
			return V_008F14_IMG_DATA_FORMAT_BC3;
1261
		default:
1262
			goto out_unknown;
1263
		}
1264
	}
1265
 
1266
	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1267
		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1268
	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1269
		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1270
	}
1271
 
1272
	/* R8G8Bx_SNORM - TODO CxV8U8 */
1273
 
1274
	/* See whether the components are of the same size. */
1275
	for (i = 1; i < desc->nr_channels; i++) {
1276
		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1277
	}
1278
 
1279
	/* Non-uniform formats. */
1280
	if (!uniform) {
1281
		switch(desc->nr_channels) {
1282
		case 3:
1283
			if (desc->channel[0].size == 5 &&
1284
			    desc->channel[1].size == 6 &&
1285
			    desc->channel[2].size == 5) {
1286
				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1287
			}
1288
			goto out_unknown;
1289
		case 4:
1290
			if (desc->channel[0].size == 5 &&
1291
			    desc->channel[1].size == 5 &&
1292
			    desc->channel[2].size == 5 &&
1293
			    desc->channel[3].size == 1) {
1294
				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1295
			}
1296
			if (desc->channel[0].size == 10 &&
1297
			    desc->channel[1].size == 10 &&
1298
			    desc->channel[2].size == 10 &&
1299
			    desc->channel[3].size == 2) {
1300
				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1301
			}
1302
			goto out_unknown;
1303
		}
1304
		goto out_unknown;
1305
	}
1306
 
1307
	if (first_non_void < 0 || first_non_void > 3)
1308
		goto out_unknown;
1309
 
1310
	/* uniform formats */
1311
	switch (desc->channel[first_non_void].size) {
1312
	case 4:
1313
		switch (desc->nr_channels) {
1314
#if 0 /* Not supported for render targets */
1315
		case 2:
1316
			return V_008F14_IMG_DATA_FORMAT_4_4;
1317
#endif
1318
		case 4:
1319
			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1320
		}
1321
		break;
1322
	case 8:
1323
		switch (desc->nr_channels) {
1324
		case 1:
1325
			return V_008F14_IMG_DATA_FORMAT_8;
1326
		case 2:
1327
			return V_008F14_IMG_DATA_FORMAT_8_8;
1328
		case 4:
1329
			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1330
		}
1331
		break;
1332
	case 16:
1333
		switch (desc->nr_channels) {
1334
		case 1:
1335
			return V_008F14_IMG_DATA_FORMAT_16;
1336
		case 2:
1337
			return V_008F14_IMG_DATA_FORMAT_16_16;
1338
		case 4:
1339
			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1340
		}
1341
		break;
1342
	case 32:
1343
		switch (desc->nr_channels) {
1344
		case 1:
1345
			return V_008F14_IMG_DATA_FORMAT_32;
1346
		case 2:
1347
			return V_008F14_IMG_DATA_FORMAT_32_32;
1348
#if 0 /* Not supported for render targets */
1349
		case 3:
1350
			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1351
#endif
1352
		case 4:
1353
			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1354
		}
1355
	}
1356
 
1357
out_unknown:
1358
	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1359
	return ~0;
1360
}
1361
 
1362
static unsigned si_tex_wrap(unsigned wrap)
1363
{
1364
	switch (wrap) {
1365
	default:
1366
	case PIPE_TEX_WRAP_REPEAT:
1367
		return V_008F30_SQ_TEX_WRAP;
1368
	case PIPE_TEX_WRAP_CLAMP:
1369
		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1370
	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1371
		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1372
	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1373
		return V_008F30_SQ_TEX_CLAMP_BORDER;
1374
	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1375
		return V_008F30_SQ_TEX_MIRROR;
1376
	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1377
		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1378
	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1379
		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1380
	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1381
		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1382
	}
1383
}
1384
 
1385
static unsigned si_tex_filter(unsigned filter)
1386
{
1387
	switch (filter) {
1388
	default:
1389
	case PIPE_TEX_FILTER_NEAREST:
1390
		return V_008F38_SQ_TEX_XY_FILTER_POINT;
1391
	case PIPE_TEX_FILTER_LINEAR:
1392
		return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1393
	}
1394
}
1395
 
1396
static unsigned si_tex_mipfilter(unsigned filter)
1397
{
1398
	switch (filter) {
1399
	case PIPE_TEX_MIPFILTER_NEAREST:
1400
		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1401
	case PIPE_TEX_MIPFILTER_LINEAR:
1402
		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1403
	default:
1404
	case PIPE_TEX_MIPFILTER_NONE:
1405
		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1406
	}
1407
}
1408
 
1409
static unsigned si_tex_compare(unsigned compare)
1410
{
1411
	switch (compare) {
1412
	default:
1413
	case PIPE_FUNC_NEVER:
1414
		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1415
	case PIPE_FUNC_LESS:
1416
		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1417
	case PIPE_FUNC_EQUAL:
1418
		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1419
	case PIPE_FUNC_LEQUAL:
1420
		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1421
	case PIPE_FUNC_GREATER:
1422
		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1423
	case PIPE_FUNC_NOTEQUAL:
1424
		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1425
	case PIPE_FUNC_GEQUAL:
1426
		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1427
	case PIPE_FUNC_ALWAYS:
1428
		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1429
	}
1430
}
1431
 
1432
static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1433
{
1434
	switch (dim) {
1435
	default:
1436
	case PIPE_TEXTURE_1D:
1437
		return V_008F1C_SQ_RSRC_IMG_1D;
1438
	case PIPE_TEXTURE_1D_ARRAY:
1439
		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1440
	case PIPE_TEXTURE_2D:
1441
	case PIPE_TEXTURE_RECT:
1442
		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1443
					V_008F1C_SQ_RSRC_IMG_2D;
1444
	case PIPE_TEXTURE_2D_ARRAY:
1445
		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1446
					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1447
	case PIPE_TEXTURE_3D:
1448
		return V_008F1C_SQ_RSRC_IMG_3D;
1449
	case PIPE_TEXTURE_CUBE:
1450
	case PIPE_TEXTURE_CUBE_ARRAY:
1451
		return V_008F1C_SQ_RSRC_IMG_CUBE;
1452
	}
1453
}
1454
 
1455
/*
1456
 * Format support testing
1457
 */
1458
 
1459
static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1460
{
1461
	return si_translate_texformat(screen, format, util_format_description(format),
1462
				      util_format_get_first_non_void_channel(format)) != ~0U;
1463
}
1464
 
1465
static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1466
					       const struct util_format_description *desc,
1467
					       int first_non_void)
1468
{
1469
	unsigned type = desc->channel[first_non_void].type;
1470
	int i;
1471
 
1472
	if (type == UTIL_FORMAT_TYPE_FIXED)
1473
		return V_008F0C_BUF_DATA_FORMAT_INVALID;
1474
 
1475
	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1476
		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1477
 
1478
	if (desc->nr_channels == 4 &&
1479
	    desc->channel[0].size == 10 &&
1480
	    desc->channel[1].size == 10 &&
1481
	    desc->channel[2].size == 10 &&
1482
	    desc->channel[3].size == 2)
1483
		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1484
 
1485
	/* See whether the components are of the same size. */
1486
	for (i = 0; i < desc->nr_channels; i++) {
1487
		if (desc->channel[first_non_void].size != desc->channel[i].size)
1488
			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1489
	}
1490
 
1491
	switch (desc->channel[first_non_void].size) {
1492
	case 8:
1493
		switch (desc->nr_channels) {
1494
		case 1:
1495
			return V_008F0C_BUF_DATA_FORMAT_8;
1496
		case 2:
1497
			return V_008F0C_BUF_DATA_FORMAT_8_8;
1498
		case 3:
1499
		case 4:
1500
			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1501
		}
1502
		break;
1503
	case 16:
1504
		switch (desc->nr_channels) {
1505
		case 1:
1506
			return V_008F0C_BUF_DATA_FORMAT_16;
1507
		case 2:
1508
			return V_008F0C_BUF_DATA_FORMAT_16_16;
1509
		case 3:
1510
		case 4:
1511
			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1512
		}
1513
		break;
1514
	case 32:
1515
		/* From the Southern Islands ISA documentation about MTBUF:
1516
		 * 'Memory reads of data in memory that is 32 or 64 bits do not
1517
		 * undergo any format conversion.'
1518
		 */
1519
		if (type != UTIL_FORMAT_TYPE_FLOAT &&
1520
		    !desc->channel[first_non_void].pure_integer)
1521
			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1522
 
1523
		switch (desc->nr_channels) {
1524
		case 1:
1525
			return V_008F0C_BUF_DATA_FORMAT_32;
1526
		case 2:
1527
			return V_008F0C_BUF_DATA_FORMAT_32_32;
1528
		case 3:
1529
			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1530
		case 4:
1531
			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1532
		}
1533
		break;
1534
	}
1535
 
1536
	return V_008F0C_BUF_DATA_FORMAT_INVALID;
1537
}
1538
 
1539
static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1540
					      const struct util_format_description *desc,
1541
					      int first_non_void)
1542
{
1543
	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1544
		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1545
 
1546
	switch (desc->channel[first_non_void].type) {
1547
	case UTIL_FORMAT_TYPE_SIGNED:
1548
		if (desc->channel[first_non_void].normalized)
1549
			return V_008F0C_BUF_NUM_FORMAT_SNORM;
1550
		else if (desc->channel[first_non_void].pure_integer)
1551
			return V_008F0C_BUF_NUM_FORMAT_SINT;
1552
		else
1553
			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1554
		break;
1555
	case UTIL_FORMAT_TYPE_UNSIGNED:
1556
		if (desc->channel[first_non_void].normalized)
1557
			return V_008F0C_BUF_NUM_FORMAT_UNORM;
1558
		else if (desc->channel[first_non_void].pure_integer)
1559
			return V_008F0C_BUF_NUM_FORMAT_UINT;
1560
		else
1561
			return V_008F0C_BUF_NUM_FORMAT_USCALED;
1562
		break;
1563
	case UTIL_FORMAT_TYPE_FLOAT:
1564
	default:
1565
		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1566
	}
1567
}
1568
 
1569
static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1570
{
1571
	const struct util_format_description *desc;
1572
	int first_non_void;
1573
	unsigned data_format;
1574
 
1575
	desc = util_format_description(format);
1576
	first_non_void = util_format_get_first_non_void_channel(format);
1577
	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1578
	return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1579
}
1580
 
1581
static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1582
{
1583
	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1584
		r600_translate_colorswap(format) != ~0U;
1585
}
1586
 
1587
static bool si_is_zs_format_supported(enum pipe_format format)
1588
{
1589
	return si_translate_dbformat(format) != V_028040_Z_INVALID;
1590
}
1591
 
1592
boolean si_is_format_supported(struct pipe_screen *screen,
1593
                               enum pipe_format format,
1594
                               enum pipe_texture_target target,
1595
                               unsigned sample_count,
1596
                               unsigned usage)
1597
{
1598
	struct si_screen *sscreen = (struct si_screen *)screen;
1599
	unsigned retval = 0;
1600
 
1601
	if (target >= PIPE_MAX_TEXTURE_TYPES) {
1602
		R600_ERR("r600: unsupported texture type %d\n", target);
1603
		return FALSE;
1604
	}
1605
 
1606
	if (!util_format_is_supported(format, usage))
1607
		return FALSE;
1608
 
1609
	if (sample_count > 1) {
1610
		/* 2D tiling on CIK is supported since DRM 2.35.0 */
1611
		if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
1612
			return FALSE;
1613
 
1614
		switch (sample_count) {
1615
		case 2:
1616
		case 4:
1617
		case 8:
1618
			break;
1619
		default:
1620
			return FALSE;
1621
		}
1622
	}
1623
 
1624
	if (usage & PIPE_BIND_SAMPLER_VIEW) {
1625
		if (target == PIPE_BUFFER) {
1626
			if (si_is_vertex_format_supported(screen, format))
1627
				retval |= PIPE_BIND_SAMPLER_VIEW;
1628
		} else {
1629
			if (si_is_sampler_format_supported(screen, format))
1630
				retval |= PIPE_BIND_SAMPLER_VIEW;
1631
		}
1632
	}
1633
 
1634
	if ((usage & (PIPE_BIND_RENDER_TARGET |
1635
		      PIPE_BIND_DISPLAY_TARGET |
1636
		      PIPE_BIND_SCANOUT |
1637
		      PIPE_BIND_SHARED |
1638
		      PIPE_BIND_BLENDABLE)) &&
1639
	    si_is_colorbuffer_format_supported(format)) {
1640
		retval |= usage &
1641
			  (PIPE_BIND_RENDER_TARGET |
1642
			   PIPE_BIND_DISPLAY_TARGET |
1643
			   PIPE_BIND_SCANOUT |
1644
			   PIPE_BIND_SHARED);
1645
		if (!util_format_is_pure_integer(format) &&
1646
		    !util_format_is_depth_or_stencil(format))
1647
			retval |= usage & PIPE_BIND_BLENDABLE;
1648
	}
1649
 
1650
	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1651
	    si_is_zs_format_supported(format)) {
1652
		retval |= PIPE_BIND_DEPTH_STENCIL;
1653
	}
1654
 
1655
	if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1656
	    si_is_vertex_format_supported(screen, format)) {
1657
		retval |= PIPE_BIND_VERTEX_BUFFER;
1658
	}
1659
 
1660
	if (usage & PIPE_BIND_TRANSFER_READ)
1661
		retval |= PIPE_BIND_TRANSFER_READ;
1662
	if (usage & PIPE_BIND_TRANSFER_WRITE)
1663
		retval |= PIPE_BIND_TRANSFER_WRITE;
1664
 
1665
	return retval == usage;
1666
}
1667
 
1668
unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1669
{
1670
	unsigned tile_mode_index = 0;
1671
 
1672
	if (stencil) {
1673
		tile_mode_index = rtex->surface.stencil_tiling_index[level];
1674
	} else {
1675
		tile_mode_index = rtex->surface.tiling_index[level];
1676
	}
1677
	return tile_mode_index;
1678
}
1679
 
1680
/*
1681
 * framebuffer handling
1682
 */
1683
 
1684
static void si_initialize_color_surface(struct si_context *sctx,
1685
					struct r600_surface *surf)
1686
{
1687
	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1688
	unsigned level = surf->base.u.tex.level;
1689
	uint64_t offset = rtex->surface.level[level].offset;
1690
	unsigned pitch, slice;
1691
	unsigned color_info, color_attrib, color_pitch, color_view;
1692
	unsigned tile_mode_index;
1693
	unsigned format, swap, ntype, endian;
1694
	const struct util_format_description *desc;
1695
	int i;
1696
	unsigned blend_clamp = 0, blend_bypass = 0;
1697
	unsigned max_comp_size;
1698
 
1699
	/* Layered rendering doesn't work with LINEAR_GENERAL.
1700
	 * (LINEAR_ALIGNED and others work) */
1701
	if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) {
1702
		assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer);
1703
		offset += rtex->surface.level[level].slice_size *
1704
			  surf->base.u.tex.first_layer;
1705
		color_view = 0;
1706
	} else {
1707
		color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
1708
			     S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
1709
	}
1710
 
1711
	pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1712
	slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1713
	if (slice) {
1714
		slice = slice - 1;
1715
	}
1716
 
1717
	tile_mode_index = si_tile_mode_index(rtex, level, false);
1718
 
1719
	desc = util_format_description(surf->base.format);
1720
	for (i = 0; i < 4; i++) {
1721
		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1722
			break;
1723
		}
1724
	}
1725
	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1726
		ntype = V_028C70_NUMBER_FLOAT;
1727
	} else {
1728
		ntype = V_028C70_NUMBER_UNORM;
1729
		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1730
			ntype = V_028C70_NUMBER_SRGB;
1731
		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1732
			if (desc->channel[i].pure_integer) {
1733
				ntype = V_028C70_NUMBER_SINT;
1734
			} else {
1735
				assert(desc->channel[i].normalized);
1736
				ntype = V_028C70_NUMBER_SNORM;
1737
			}
1738
		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1739
			if (desc->channel[i].pure_integer) {
1740
				ntype = V_028C70_NUMBER_UINT;
1741
			} else {
1742
				assert(desc->channel[i].normalized);
1743
				ntype = V_028C70_NUMBER_UNORM;
1744
			}
1745
		}
1746
	}
1747
 
1748
	format = si_translate_colorformat(surf->base.format);
1749
	if (format == V_028C70_COLOR_INVALID) {
1750
		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1751
	}
1752
	assert(format != V_028C70_COLOR_INVALID);
1753
	swap = r600_translate_colorswap(surf->base.format);
1754
	if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1755
		endian = V_028C70_ENDIAN_NONE;
1756
	} else {
1757
		endian = si_colorformat_endian_swap(format);
1758
	}
1759
 
1760
	/* blend clamp should be set for all NORM/SRGB types */
1761
	if (ntype == V_028C70_NUMBER_UNORM ||
1762
	    ntype == V_028C70_NUMBER_SNORM ||
1763
	    ntype == V_028C70_NUMBER_SRGB)
1764
		blend_clamp = 1;
1765
 
1766
	/* set blend bypass according to docs if SINT/UINT or
1767
	   8/24 COLOR variants */
1768
	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1769
	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1770
	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
1771
		blend_clamp = 0;
1772
		blend_bypass = 1;
1773
	}
1774
 
1775
	color_info = S_028C70_FORMAT(format) |
1776
		S_028C70_COMP_SWAP(swap) |
1777
		S_028C70_BLEND_CLAMP(blend_clamp) |
1778
		S_028C70_BLEND_BYPASS(blend_bypass) |
1779
		S_028C70_NUMBER_TYPE(ntype) |
1780
		S_028C70_ENDIAN(endian);
1781
 
1782
	color_pitch = S_028C64_TILE_MAX(pitch);
1783
 
1784
	color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1785
		S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1786
 
1787
	if (rtex->resource.b.b.nr_samples > 1) {
1788
		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1789
 
1790
		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1791
				S_028C74_NUM_FRAGMENTS(log_samples);
1792
 
1793
		if (rtex->fmask.size) {
1794
			color_info |= S_028C70_COMPRESSION(1);
1795
			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1796
 
1797
			color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
1798
 
1799
			if (sctx->b.chip_class == SI) {
1800
				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
1801
				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1802
			}
1803
			if (sctx->b.chip_class >= CIK) {
1804
				color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1);
1805
			}
1806
		}
1807
	}
1808
 
1809
	offset += rtex->resource.gpu_address;
1810
 
1811
	surf->cb_color_base = offset >> 8;
1812
	surf->cb_color_pitch = color_pitch;
1813
	surf->cb_color_slice = S_028C68_TILE_MAX(slice);
1814
	surf->cb_color_view = color_view;
1815
	surf->cb_color_info = color_info;
1816
	surf->cb_color_attrib = color_attrib;
1817
 
1818
	if (rtex->fmask.size) {
1819
		surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
1820
		surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
1821
	} else {
1822
		/* This must be set for fast clear to work without FMASK. */
1823
		surf->cb_color_fmask = surf->cb_color_base;
1824
		surf->cb_color_fmask_slice = surf->cb_color_slice;
1825
		surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1826
 
1827
		if (sctx->b.chip_class == SI) {
1828
			unsigned bankh = util_logbase2(rtex->surface.bankh);
1829
			surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1830
		}
1831
 
1832
		if (sctx->b.chip_class >= CIK) {
1833
			surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch);
1834
		}
1835
	}
1836
 
1837
	/* Determine pixel shader export format */
1838
	max_comp_size = si_colorformat_max_comp_size(format);
1839
	if (ntype == V_028C70_NUMBER_SRGB ||
1840
	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1841
	     max_comp_size <= 10) ||
1842
	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1843
		surf->export_16bpc = true;
1844
	}
1845
 
1846
	surf->color_initialized = true;
1847
}
1848
 
1849
static void si_init_depth_surface(struct si_context *sctx,
1850
				  struct r600_surface *surf)
1851
{
1852
	struct si_screen *sscreen = sctx->screen;
1853
	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1854
	unsigned level = surf->base.u.tex.level;
1855
	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
1856
	unsigned format, tile_mode_index, array_mode;
1857
	unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1858
	uint32_t z_info, s_info, db_depth_info;
1859
	uint64_t z_offs, s_offs;
1860
	uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0;
1861
 
1862
	switch (sctx->framebuffer.state.zsbuf->texture->format) {
1863
	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1864
	case PIPE_FORMAT_X8Z24_UNORM:
1865
	case PIPE_FORMAT_Z24X8_UNORM:
1866
	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1867
		pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1868
		break;
1869
	case PIPE_FORMAT_Z32_FLOAT:
1870
	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1871
		pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1872
						S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1873
		break;
1874
	case PIPE_FORMAT_Z16_UNORM:
1875
		pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1876
		break;
1877
	default:
1878
		assert(0);
1879
	}
1880
 
1881
	format = si_translate_dbformat(rtex->resource.b.b.format);
1882
 
1883
	if (format == V_028040_Z_INVALID) {
1884
		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1885
	}
1886
	assert(format != V_028040_Z_INVALID);
1887
 
1888
	s_offs = z_offs = rtex->resource.gpu_address;
1889
	z_offs += rtex->surface.level[level].offset;
1890
	s_offs += rtex->surface.stencil_level[level].offset;
1891
 
1892
	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1893
 
1894
	z_info = S_028040_FORMAT(format);
1895
	if (rtex->resource.b.b.nr_samples > 1) {
1896
		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1897
	}
1898
 
1899
	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1900
		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1901
	else
1902
		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1903
 
1904
	if (sctx->b.chip_class >= CIK) {
1905
		switch (rtex->surface.level[level].mode) {
1906
		case RADEON_SURF_MODE_2D:
1907
			array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
1908
			break;
1909
		case RADEON_SURF_MODE_1D:
1910
		case RADEON_SURF_MODE_LINEAR_ALIGNED:
1911
		case RADEON_SURF_MODE_LINEAR:
1912
		default:
1913
			array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
1914
			break;
1915
		}
1916
		tile_split = rtex->surface.tile_split;
1917
		stile_split = rtex->surface.stencil_tile_split;
1918
		macro_aspect = rtex->surface.mtilea;
1919
		bankw = rtex->surface.bankw;
1920
		bankh = rtex->surface.bankh;
1921
		tile_split = cik_tile_split(tile_split);
1922
		stile_split = cik_tile_split(stile_split);
1923
		macro_aspect = cik_macro_tile_aspect(macro_aspect);
1924
		bankw = cik_bank_wh(bankw);
1925
		bankh = cik_bank_wh(bankh);
1926
		nbanks = si_num_banks(sscreen, rtex);
1927
		tile_mode_index = si_tile_mode_index(rtex, level, false);
1928
		pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
1929
 
1930
		db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
1931
			S_02803C_PIPE_CONFIG(pipe_config) |
1932
			S_02803C_BANK_WIDTH(bankw) |
1933
			S_02803C_BANK_HEIGHT(bankh) |
1934
			S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
1935
			S_02803C_NUM_BANKS(nbanks);
1936
		z_info |= S_028040_TILE_SPLIT(tile_split);
1937
		s_info |= S_028044_TILE_SPLIT(stile_split);
1938
	} else {
1939
		tile_mode_index = si_tile_mode_index(rtex, level, false);
1940
		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1941
		tile_mode_index = si_tile_mode_index(rtex, level, true);
1942
		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1943
	}
1944
 
1945
	/* HiZ aka depth buffer htile */
1946
	/* use htile only for first level */
1947
	if (rtex->htile_buffer && !level) {
1948
		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
1949
			  S_028040_ALLOW_EXPCLEAR(1);
1950
 
1951
		/* Use all of the htile_buffer for depth, because we don't
1952
		 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
1953
		s_info |= S_028044_TILE_STENCIL_DISABLE(1);
1954
 
1955
		uint64_t va = rtex->htile_buffer->gpu_address;
1956
		db_htile_data_base = va >> 8;
1957
		db_htile_surface = S_028ABC_FULL_CACHE(1);
1958
	} else {
1959
		db_htile_data_base = 0;
1960
		db_htile_surface = 0;
1961
	}
1962
 
1963
	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
1964
 
1965
	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
1966
			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
1967
	surf->db_htile_data_base = db_htile_data_base;
1968
	surf->db_depth_info = db_depth_info;
1969
	surf->db_z_info = z_info;
1970
	surf->db_stencil_info = s_info;
1971
	surf->db_depth_base = z_offs >> 8;
1972
	surf->db_stencil_base = s_offs >> 8;
1973
	surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
1974
			      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
1975
	surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
1976
							levelinfo->nblk_y) / 64 - 1);
1977
	surf->db_htile_surface = db_htile_surface;
1978
	surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
1979
 
1980
	surf->depth_initialized = true;
1981
}
1982
 
1983
static void si_set_framebuffer_state(struct pipe_context *ctx,
1984
				     const struct pipe_framebuffer_state *state)
1985
{
1986
	struct si_context *sctx = (struct si_context *)ctx;
1987
	struct pipe_constant_buffer constbuf = {0};
1988
	struct r600_surface *surf = NULL;
1989
	struct r600_texture *rtex;
1990
	bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
1991
	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
1992
	int i;
1993
 
1994
	/* Only flush TC when changing the framebuffer state, because
1995
	 * the only client not using TC that can change textures is
1996
	 * the framebuffer.
1997
	 *
1998
	 * Flush all CB and DB caches here because all buffers can be used
1999
	 * for write by both TC (with shader image stores) and CB/DB.
2000
	 */
2001
	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
2002
			 SI_CONTEXT_INV_TC_L2 |
2003
			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
2004
 
2005
	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2006
 
2007
	sctx->framebuffer.export_16bpc = 0;
2008
	sctx->framebuffer.compressed_cb_mask = 0;
2009
	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2010
	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2011
	sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
2012
				  util_format_is_pure_integer(state->cbufs[0]->format);
2013
 
2014
	if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
2015
		sctx->db_render_state.dirty = true;
2016
 
2017
	for (i = 0; i < state->nr_cbufs; i++) {
2018
		if (!state->cbufs[i])
2019
			continue;
2020
 
2021
		surf = (struct r600_surface*)state->cbufs[i];
2022
		rtex = (struct r600_texture*)surf->base.texture;
2023
 
2024
		if (!surf->color_initialized) {
2025
			si_initialize_color_surface(sctx, surf);
2026
		}
2027
 
2028
		if (surf->export_16bpc) {
2029
			sctx->framebuffer.export_16bpc |= 1 << i;
2030
		}
2031
 
2032
		if (rtex->fmask.size && rtex->cmask.size) {
2033
			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2034
		}
2035
	}
2036
	/* Set the 16BPC export for possible dual-src blending. */
2037
	if (i == 1 && surf && surf->export_16bpc) {
2038
		sctx->framebuffer.export_16bpc |= 1 << 1;
2039
	}
2040
 
2041
	assert(!(sctx->framebuffer.export_16bpc & ~0xff));
2042
 
2043
	if (state->zsbuf) {
2044
		surf = (struct r600_surface*)state->zsbuf;
2045
 
2046
		if (!surf->depth_initialized) {
2047
			si_init_depth_surface(sctx, surf);
2048
		}
2049
	}
2050
 
2051
	si_update_fb_rs_state(sctx);
2052
	si_update_fb_blend_state(sctx);
2053
 
2054
	sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
2055
	sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
2056
	sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
2057
	sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
2058
	sctx->framebuffer.atom.dirty = true;
2059
 
2060
	if (sctx->framebuffer.nr_samples != old_nr_samples) {
2061
		sctx->msaa_config.dirty = true;
2062
		sctx->db_render_state.dirty = true;
2063
 
2064
		/* Set sample locations as fragment shader constants. */
2065
		switch (sctx->framebuffer.nr_samples) {
2066
		case 1:
2067
			constbuf.user_buffer = sctx->b.sample_locations_1x;
2068
			break;
2069
		case 2:
2070
			constbuf.user_buffer = sctx->b.sample_locations_2x;
2071
			break;
2072
		case 4:
2073
			constbuf.user_buffer = sctx->b.sample_locations_4x;
2074
			break;
2075
		case 8:
2076
			constbuf.user_buffer = sctx->b.sample_locations_8x;
2077
			break;
2078
		case 16:
2079
			constbuf.user_buffer = sctx->b.sample_locations_16x;
2080
			break;
2081
		default:
2082
			assert(0);
2083
		}
2084
		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2085
		ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
2086
					 SI_DRIVER_STATE_CONST_BUF, &constbuf);
2087
 
2088
		/* Smoothing (only possible with nr_samples == 1) uses the same
2089
		 * sample locations as the MSAA it simulates.
2090
		 *
2091
		 * Therefore, don't update the sample locations when
2092
		 * transitioning from no AA to smoothing-equivalent AA, and
2093
		 * vice versa.
2094
		 */
2095
		if ((sctx->framebuffer.nr_samples != 1 ||
2096
		     old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
2097
		    (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
2098
		     old_nr_samples != 1))
2099
			sctx->msaa_sample_locs.dirty = true;
2100
	}
2101
}
2102
 
2103
static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2104
{
2105
	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2106
	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2107
	unsigned i, nr_cbufs = state->nr_cbufs;
2108
	struct r600_texture *tex = NULL;
2109
	struct r600_surface *cb = NULL;
2110
 
2111
	/* Colorbuffers. */
2112
	for (i = 0; i < nr_cbufs; i++) {
2113
		cb = (struct r600_surface*)state->cbufs[i];
2114
		if (!cb) {
2115
			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2116
					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2117
			continue;
2118
		}
2119
 
2120
		tex = (struct r600_texture *)cb->base.texture;
2121
		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2122
				      &tex->resource, RADEON_USAGE_READWRITE,
2123
				      tex->surface.nsamples > 1 ?
2124
					      RADEON_PRIO_COLOR_BUFFER_MSAA :
2125
					      RADEON_PRIO_COLOR_BUFFER);
2126
 
2127
		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2128
			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2129
				tex->cmask_buffer, RADEON_USAGE_READWRITE,
2130
				RADEON_PRIO_COLOR_META);
2131
		}
2132
 
2133
		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
2134
		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
2135
		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
2136
		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
2137
		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
2138
		radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
2139
		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
2140
		radeon_emit(cs, 0);			/* R_028C78 unused */
2141
		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
2142
		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
2143
		radeon_emit(cs, cb->cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
2144
		radeon_emit(cs, cb->cb_color_fmask_slice);	/* R_028C88_CB_COLOR0_FMASK_SLICE */
2145
		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2146
		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2147
	}
2148
	/* set CB_COLOR1_INFO for possible dual-src blending */
2149
	if (i == 1 && state->cbufs[0]) {
2150
		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
2151
				       cb->cb_color_info | tex->cb_color_info);
2152
		i++;
2153
	}
2154
	for (; i < 8 ; i++) {
2155
		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2156
	}
2157
 
2158
	/* ZS buffer. */
2159
	if (state->zsbuf) {
2160
		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2161
		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2162
 
2163
		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2164
				      &rtex->resource, RADEON_USAGE_READWRITE,
2165
				      zb->base.texture->nr_samples > 1 ?
2166
					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
2167
					      RADEON_PRIO_DEPTH_BUFFER);
2168
 
2169
		if (zb->db_htile_data_base) {
2170
			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2171
					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
2172
					      RADEON_PRIO_DEPTH_META);
2173
		}
2174
 
2175
		r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2176
		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2177
 
2178
		r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2179
		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
2180
		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
2181
			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2182
		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
2183
		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
2184
		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
2185
		radeon_emit(cs, zb->db_depth_base);	/* R_028050_DB_Z_WRITE_BASE */
2186
		radeon_emit(cs, zb->db_stencil_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
2187
		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
2188
		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
2189
 
2190
		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2191
		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
2192
		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2193
				       zb->pa_su_poly_offset_db_fmt_cntl);
2194
	} else {
2195
		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2196
		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2197
		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2198
	}
2199
 
2200
	/* Framebuffer dimensions. */
2201
        /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2202
	r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2203
			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2204
}
2205
 
2206
static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
2207
				     struct r600_atom *atom)
2208
{
2209
	struct si_context *sctx = (struct si_context *)rctx;
2210
	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2211
	unsigned nr_samples = sctx->framebuffer.nr_samples;
2212
 
2213
	cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
2214
						SI_NUM_SMOOTH_AA_SAMPLES);
2215
}
2216
 
2217
const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */
2218
 
2219
static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
2220
{
2221
	struct si_context *sctx = (struct si_context *)rctx;
2222
	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2223
 
2224
	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2225
				sctx->ps_iter_samples,
2226
				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
2227
}
2228
 
2229
const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
2230
 
2231
static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2232
{
2233
	struct si_context *sctx = (struct si_context *)ctx;
2234
 
2235
	if (sctx->ps_iter_samples == min_samples)
2236
		return;
2237
 
2238
	sctx->ps_iter_samples = min_samples;
2239
 
2240
	if (sctx->framebuffer.nr_samples > 1)
2241
		sctx->msaa_config.dirty = true;
2242
}
2243
 
2244
/*
2245
 * Samplers
2246
 */
2247
 
2248
static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
2249
							struct pipe_resource *texture,
2250
							const struct pipe_sampler_view *state)
2251
{
2252
	struct si_context *sctx = (struct si_context*)ctx;
2253
	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
2254
	struct r600_texture *tmp = (struct r600_texture*)texture;
2255
	const struct util_format_description *desc;
2256
	unsigned format, num_format;
2257
	uint32_t pitch = 0;
2258
	unsigned char state_swizzle[4], swizzle[4];
2259
	unsigned height, depth, width;
2260
	enum pipe_format pipe_format = state->format;
2261
	struct radeon_surf_level *surflevel;
2262
	int first_non_void;
2263
	uint64_t va;
2264
 
2265
	if (view == NULL)
2266
		return NULL;
2267
 
2268
	/* initialize base object */
2269
	view->base = *state;
2270
	view->base.texture = NULL;
2271
	view->base.reference.count = 1;
2272
	view->base.context = ctx;
2273
 
2274
	/* NULL resource, obey swizzle (only ZERO and ONE make sense). */
2275
	if (!texture) {
2276
		view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) |
2277
				 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) |
2278
				 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) |
2279
				 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) |
2280
				 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D);
2281
		return &view->base;
2282
	}
2283
 
2284
	pipe_resource_reference(&view->base.texture, texture);
2285
	view->resource = &tmp->resource;
2286
 
2287
	/* Buffer resource. */
2288
	if (texture->target == PIPE_BUFFER) {
2289
		unsigned stride;
2290
 
2291
		desc = util_format_description(state->format);
2292
		first_non_void = util_format_get_first_non_void_channel(state->format);
2293
		stride = desc->block.bits / 8;
2294
		va = tmp->resource.gpu_address + state->u.buf.first_element*stride;
2295
		format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2296
		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2297
 
2298
		view->state[4] = va;
2299
		view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
2300
				 S_008F04_STRIDE(stride);
2301
		view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
2302
		view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2303
				 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2304
				 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2305
				 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2306
				 S_008F0C_NUM_FORMAT(num_format) |
2307
				 S_008F0C_DATA_FORMAT(format);
2308
 
2309
		LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
2310
		return &view->base;
2311
	}
2312
 
2313
	state_swizzle[0] = state->swizzle_r;
2314
	state_swizzle[1] = state->swizzle_g;
2315
	state_swizzle[2] = state->swizzle_b;
2316
	state_swizzle[3] = state->swizzle_a;
2317
 
2318
	surflevel = tmp->surface.level;
2319
 
2320
	/* Texturing with separate depth and stencil. */
2321
	if (tmp->is_depth && !tmp->is_flushing_texture) {
2322
		switch (pipe_format) {
2323
		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2324
			pipe_format = PIPE_FORMAT_Z32_FLOAT;
2325
			break;
2326
		case PIPE_FORMAT_X8Z24_UNORM:
2327
		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2328
			/* Z24 is always stored like this. */
2329
			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2330
			break;
2331
		case PIPE_FORMAT_X24S8_UINT:
2332
		case PIPE_FORMAT_S8X24_UINT:
2333
		case PIPE_FORMAT_X32_S8X24_UINT:
2334
			pipe_format = PIPE_FORMAT_S8_UINT;
2335
			surflevel = tmp->surface.stencil_level;
2336
			break;
2337
		default:;
2338
		}
2339
	}
2340
 
2341
	desc = util_format_description(pipe_format);
2342
 
2343
	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2344
		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2345
		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2346
 
2347
		switch (pipe_format) {
2348
		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2349
		case PIPE_FORMAT_X24S8_UINT:
2350
		case PIPE_FORMAT_X32_S8X24_UINT:
2351
		case PIPE_FORMAT_X8Z24_UNORM:
2352
			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2353
			break;
2354
		default:
2355
			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2356
		}
2357
	} else {
2358
		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2359
	}
2360
 
2361
	first_non_void = util_format_get_first_non_void_channel(pipe_format);
2362
 
2363
	switch (pipe_format) {
2364
	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2365
		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2366
		break;
2367
	default:
2368
		if (first_non_void < 0) {
2369
			if (util_format_is_compressed(pipe_format)) {
2370
				switch (pipe_format) {
2371
				case PIPE_FORMAT_DXT1_SRGB:
2372
				case PIPE_FORMAT_DXT1_SRGBA:
2373
				case PIPE_FORMAT_DXT3_SRGBA:
2374
				case PIPE_FORMAT_DXT5_SRGBA:
2375
				case PIPE_FORMAT_BPTC_SRGBA:
2376
					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2377
					break;
2378
				case PIPE_FORMAT_RGTC1_SNORM:
2379
				case PIPE_FORMAT_LATC1_SNORM:
2380
				case PIPE_FORMAT_RGTC2_SNORM:
2381
				case PIPE_FORMAT_LATC2_SNORM:
2382
				/* implies float, so use SNORM/UNORM to determine
2383
				   whether data is signed or not */
2384
				case PIPE_FORMAT_BPTC_RGB_FLOAT:
2385
					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2386
					break;
2387
				default:
2388
					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2389
					break;
2390
				}
2391
			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2392
				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2393
			} else {
2394
				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2395
			}
2396
		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2397
			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2398
		} else {
2399
			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2400
 
2401
			switch (desc->channel[first_non_void].type) {
2402
			case UTIL_FORMAT_TYPE_FLOAT:
2403
				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2404
				break;
2405
			case UTIL_FORMAT_TYPE_SIGNED:
2406
				if (desc->channel[first_non_void].normalized)
2407
					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2408
				else if (desc->channel[first_non_void].pure_integer)
2409
					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2410
				else
2411
					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2412
				break;
2413
			case UTIL_FORMAT_TYPE_UNSIGNED:
2414
				if (desc->channel[first_non_void].normalized)
2415
					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2416
				else if (desc->channel[first_non_void].pure_integer)
2417
					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2418
				else
2419
					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2420
			}
2421
		}
2422
	}
2423
 
2424
	format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2425
	if (format == ~0) {
2426
		format = 0;
2427
	}
2428
 
2429
	/* not supported any more */
2430
	//endian = si_colorformat_endian_swap(format);
2431
 
2432
	width = surflevel[0].npix_x;
2433
	height = surflevel[0].npix_y;
2434
	depth = surflevel[0].npix_z;
2435
	pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
2436
 
2437
	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2438
	        height = 1;
2439
		depth = texture->array_size;
2440
	} else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2441
		depth = texture->array_size;
2442
	} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
2443
		depth = texture->array_size / 6;
2444
 
2445
	va = tmp->resource.gpu_address + surflevel[0].offset;
2446
	va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
2447
 
2448
	view->state[0] = va >> 8;
2449
	view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2450
			  S_008F14_DATA_FORMAT(format) |
2451
			  S_008F14_NUM_FORMAT(num_format));
2452
	view->state[2] = (S_008F18_WIDTH(width - 1) |
2453
			  S_008F18_HEIGHT(height - 1));
2454
	view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2455
			  S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2456
			  S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2457
			  S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2458
			  S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2459
 
2460
			  S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2461
						      util_logbase2(texture->nr_samples) :
2462
						      state->u.tex.last_level - tmp->mipmap_shift) |
2463
			  S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
2464
			  S_008F1C_POW2_PAD(texture->last_level > 0) |
2465
			  S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2466
	view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2467
	view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2468
			  S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2469
	view->state[6] = 0;
2470
	view->state[7] = 0;
2471
 
2472
	/* Initialize the sampler view for FMASK. */
2473
	if (tmp->fmask.size) {
2474
		uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset;
2475
		uint32_t fmask_format;
2476
 
2477
		switch (texture->nr_samples) {
2478
		case 2:
2479
			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2480
			break;
2481
		case 4:
2482
			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2483
			break;
2484
		case 8:
2485
			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2486
			break;
2487
		default:
2488
			assert(0);
2489
			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2490
		}
2491
 
2492
		view->fmask_state[0] = va >> 8;
2493
		view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2494
				       S_008F14_DATA_FORMAT(fmask_format) |
2495
				       S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2496
		view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2497
				       S_008F18_HEIGHT(height - 1);
2498
		view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2499
				       S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2500
				       S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2501
				       S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2502
				       S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2503
				       S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2504
		view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2505
				       S_008F20_PITCH(tmp->fmask.pitch - 1);
2506
		view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2507
				       S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2508
		view->fmask_state[6] = 0;
2509
		view->fmask_state[7] = 0;
2510
	}
2511
 
2512
	return &view->base;
2513
}
2514
 
2515
static void si_sampler_view_destroy(struct pipe_context *ctx,
2516
				    struct pipe_sampler_view *state)
2517
{
2518
	struct si_sampler_view *view = (struct si_sampler_view *)state;
2519
 
2520
	if (view->resource && view->resource->b.b.target == PIPE_BUFFER)
2521
		LIST_DELINIT(&view->list);
2522
 
2523
	pipe_resource_reference(&state->texture, NULL);
2524
	FREE(view);
2525
}
2526
 
2527
static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2528
{
2529
	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2530
	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2531
	       (linear_filter &&
2532
	        (wrap == PIPE_TEX_WRAP_CLAMP ||
2533
		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2534
}
2535
 
2536
static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2537
{
2538
	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2539
			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2540
 
2541
	return (state->border_color.ui[0] || state->border_color.ui[1] ||
2542
		state->border_color.ui[2] || state->border_color.ui[3]) &&
2543
	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2544
		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2545
		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2546
}
2547
 
2548
static void *si_create_sampler_state(struct pipe_context *ctx,
2549
				     const struct pipe_sampler_state *state)
2550
{
2551
	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
2552
	unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2553
	unsigned border_color_type;
2554
 
2555
	if (rstate == NULL) {
2556
		return NULL;
2557
	}
2558
 
2559
	if (sampler_state_needs_border_color(state))
2560
		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2561
	else
2562
		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2563
 
2564
	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2565
			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2566
			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2567
			  r600_tex_aniso_filter(state->max_anisotropy) << 9 |
2568
			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2569
			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2570
			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2571
	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2572
			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2573
	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2574
			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
2575
			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
2576
			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2577
	rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2578
 
2579
	if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2580
		memcpy(rstate->border_color, state->border_color.ui,
2581
		       sizeof(rstate->border_color));
2582
	}
2583
 
2584
	return rstate;
2585
}
2586
 
2587
/* Upload border colors and update the pointers in resource descriptors.
2588
 * There can only be 4096 border colors per context.
2589
 *
2590
 * XXX: This is broken if the buffer gets reallocated.
2591
 */
2592
static void si_set_border_colors(struct si_context *sctx, unsigned count,
2593
				 void **states)
2594
{
2595
	struct si_sampler_state **rstates = (struct si_sampler_state **)states;
2596
	uint32_t *border_color_table = NULL;
2597
	int i, j;
2598
 
2599
	for (i = 0; i < count; i++) {
2600
		if (rstates[i] &&
2601
		    G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2602
		    V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2603
			if (!sctx->border_color_table ||
2604
			    ((sctx->border_color_offset + count - i) &
2605
			     C_008F3C_BORDER_COLOR_PTR)) {
2606
				r600_resource_reference(&sctx->border_color_table, NULL);
2607
				sctx->border_color_offset = 0;
2608
 
2609
				sctx->border_color_table =
2610
					si_resource_create_custom(&sctx->screen->b.b,
2611
								  PIPE_USAGE_DYNAMIC,
2612
								  4096 * 4 * 4);
2613
			}
2614
 
2615
			if (!border_color_table) {
2616
			        border_color_table =
2617
					sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
2618
							     sctx->b.rings.gfx.cs,
2619
							     PIPE_TRANSFER_WRITE |
2620
							     PIPE_TRANSFER_UNSYNCHRONIZED);
2621
			}
2622
 
2623
			for (j = 0; j < 4; j++) {
2624
				border_color_table[4 * sctx->border_color_offset + j] =
2625
					util_le32_to_cpu(rstates[i]->border_color[j]);
2626
			}
2627
 
2628
			rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2629
			rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
2630
		}
2631
	}
2632
 
2633
	if (border_color_table) {
2634
		struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
2635
 
2636
		uint64_t va_offset = sctx->border_color_table->gpu_address;
2637
 
2638
		si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2639
		if (sctx->b.chip_class >= CIK)
2640
			si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2641
		si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
2642
			      RADEON_PRIO_SHADER_DATA);
2643
		si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
2644
	}
2645
}
2646
 
2647
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
2648
                                   unsigned start, unsigned count,
2649
                                   void **states)
2650
{
2651
	struct si_context *sctx = (struct si_context *)ctx;
2652
 
2653
	if (!count || shader >= SI_NUM_SHADERS)
2654
		return;
2655
 
2656
	si_set_border_colors(sctx, count, states);
2657
	si_set_sampler_descriptors(sctx, shader, start, count, states);
2658
}
2659
 
2660
static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2661
{
2662
	struct si_context *sctx = (struct si_context *)ctx;
2663
	struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
2664
	struct si_pm4_state *pm4 = &state->pm4;
2665
	uint16_t mask = sample_mask;
2666
 
2667
        if (state == NULL)
2668
                return;
2669
 
2670
	state->sample_mask = mask;
2671
	si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
2672
	si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
2673
 
2674
	si_pm4_set_state(sctx, sample_mask, state);
2675
}
2676
 
2677
static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
2678
{
2679
	free(state);
2680
}
2681
 
2682
/*
2683
 * Vertex elements & buffers
2684
 */
2685
 
2686
static void *si_create_vertex_elements(struct pipe_context *ctx,
2687
				       unsigned count,
2688
				       const struct pipe_vertex_element *elements)
2689
{
2690
	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
2691
	int i;
2692
 
2693
	assert(count < PIPE_MAX_ATTRIBS);
2694
	if (!v)
2695
		return NULL;
2696
 
2697
	v->count = count;
2698
	for (i = 0; i < count; ++i) {
2699
		const struct util_format_description *desc;
2700
		unsigned data_format, num_format;
2701
		int first_non_void;
2702
 
2703
		desc = util_format_description(elements[i].src_format);
2704
		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
2705
		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2706
		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2707
 
2708
		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2709
				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2710
				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2711
				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2712
				   S_008F0C_NUM_FORMAT(num_format) |
2713
				   S_008F0C_DATA_FORMAT(data_format);
2714
		v->format_size[i] = desc->block.bits / 8;
2715
	}
2716
	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
2717
 
2718
	return v;
2719
}
2720
 
2721
static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
2722
{
2723
	struct si_context *sctx = (struct si_context *)ctx;
2724
	struct si_vertex_element *v = (struct si_vertex_element*)state;
2725
 
2726
	sctx->vertex_elements = v;
2727
	sctx->vertex_buffers_dirty = true;
2728
}
2729
 
2730
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
2731
{
2732
	struct si_context *sctx = (struct si_context *)ctx;
2733
 
2734
	if (sctx->vertex_elements == state)
2735
		sctx->vertex_elements = NULL;
2736
	FREE(state);
2737
}
2738
 
2739
static void si_set_vertex_buffers(struct pipe_context *ctx,
2740
				  unsigned start_slot, unsigned count,
2741
				  const struct pipe_vertex_buffer *buffers)
2742
{
2743
	struct si_context *sctx = (struct si_context *)ctx;
2744
	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
2745
	int i;
2746
 
2747
	assert(start_slot + count <= Elements(sctx->vertex_buffer));
2748
 
2749
	if (buffers) {
2750
		for (i = 0; i < count; i++) {
2751
			const struct pipe_vertex_buffer *src = buffers + i;
2752
			struct pipe_vertex_buffer *dsti = dst + i;
2753
 
2754
			pipe_resource_reference(&dsti->buffer, src->buffer);
2755
			dsti->buffer_offset = src->buffer_offset;
2756
			dsti->stride = src->stride;
2757
		}
2758
	} else {
2759
		for (i = 0; i < count; i++) {
2760
			pipe_resource_reference(&dst[i].buffer, NULL);
2761
		}
2762
	}
2763
	sctx->vertex_buffers_dirty = true;
2764
}
2765
 
2766
static void si_set_index_buffer(struct pipe_context *ctx,
2767
				const struct pipe_index_buffer *ib)
2768
{
2769
	struct si_context *sctx = (struct si_context *)ctx;
2770
 
2771
	if (ib) {
2772
		pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
2773
	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
2774
	} else {
2775
		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
2776
	}
2777
}
2778
 
2779
/*
2780
 * Misc
2781
 */
2782
static void si_set_polygon_stipple(struct pipe_context *ctx,
2783
				   const struct pipe_poly_stipple *state)
2784
{
2785
	struct si_context *sctx = (struct si_context *)ctx;
2786
	struct pipe_resource *tex;
2787
	struct pipe_sampler_view *view;
2788
	bool is_zero = true;
2789
	bool is_one = true;
2790
	int i;
2791
 
2792
	/* The hardware obeys 0 and 1 swizzles in the descriptor even if
2793
	 * the resource is NULL/invalid. Take advantage of this fact and skip
2794
	 * texture allocation if the stipple pattern is constant.
2795
	 *
2796
	 * This is an optimization for the common case when stippling isn't
2797
	 * used but set_polygon_stipple is still called by st/mesa.
2798
	 */
2799
	for (i = 0; i < Elements(state->stipple); i++) {
2800
		is_zero = is_zero && state->stipple[i] == 0;
2801
		is_one = is_one && state->stipple[i] == 0xffffffff;
2802
	}
2803
 
2804
	if (is_zero || is_one) {
2805
		struct pipe_sampler_view templ = {{0}};
2806
 
2807
		templ.swizzle_r = PIPE_SWIZZLE_ZERO;
2808
		templ.swizzle_g = PIPE_SWIZZLE_ZERO;
2809
		templ.swizzle_b = PIPE_SWIZZLE_ZERO;
2810
		/* The pattern should be inverted in the texture. */
2811
		templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO;
2812
 
2813
		view = ctx->create_sampler_view(ctx, NULL, &templ);
2814
	} else {
2815
		/* Create a new texture. */
2816
		tex = util_pstipple_create_stipple_texture(ctx, state->stipple);
2817
		if (!tex)
2818
			return;
2819
 
2820
		view = util_pstipple_create_sampler_view(ctx, tex);
2821
		pipe_resource_reference(&tex, NULL);
2822
	}
2823
 
2824
	ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT,
2825
			       SI_POLY_STIPPLE_SAMPLER, 1, &view);
2826
	pipe_sampler_view_reference(&view, NULL);
2827
 
2828
	/* Bind the sampler state if needed. */
2829
	if (!sctx->pstipple_sampler_state) {
2830
		sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx);
2831
		ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT,
2832
					 SI_POLY_STIPPLE_SAMPLER, 1,
2833
					 &sctx->pstipple_sampler_state);
2834
	}
2835
}
2836
 
2837
static void si_texture_barrier(struct pipe_context *ctx)
2838
{
2839
	struct si_context *sctx = (struct si_context *)ctx;
2840
 
2841
	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
2842
			 SI_CONTEXT_INV_TC_L2 |
2843
			 SI_CONTEXT_FLUSH_AND_INV_CB;
2844
}
2845
 
2846
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
2847
{
2848
	struct pipe_blend_state blend;
2849
 
2850
	memset(&blend, 0, sizeof(blend));
2851
	blend.independent_blend_enable = true;
2852
	blend.rt[0].colormask = 0xf;
2853
	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
2854
}
2855
 
2856
static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
2857
				 bool include_draw_vbo)
2858
{
2859
	si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
2860
}
2861
 
2862
void si_init_state_functions(struct si_context *sctx)
2863
{
2864
	si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
2865
	si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
2866
	si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
2867
 
2868
	sctx->b.b.create_blend_state = si_create_blend_state;
2869
	sctx->b.b.bind_blend_state = si_bind_blend_state;
2870
	sctx->b.b.delete_blend_state = si_delete_blend_state;
2871
	sctx->b.b.set_blend_color = si_set_blend_color;
2872
 
2873
	sctx->b.b.create_rasterizer_state = si_create_rs_state;
2874
	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
2875
	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
2876
 
2877
	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
2878
	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
2879
	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
2880
 
2881
	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
2882
	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
2883
	sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
2884
	sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
2885
 
2886
	sctx->b.b.set_clip_state = si_set_clip_state;
2887
	sctx->b.b.set_scissor_states = si_set_scissor_states;
2888
	sctx->b.b.set_viewport_states = si_set_viewport_states;
2889
	sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
2890
 
2891
	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
2892
	sctx->b.b.get_sample_position = cayman_get_sample_position;
2893
 
2894
	sctx->b.b.create_sampler_state = si_create_sampler_state;
2895
	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2896
	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
2897
 
2898
	sctx->b.b.create_sampler_view = si_create_sampler_view;
2899
	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
2900
 
2901
	sctx->b.b.set_sample_mask = si_set_sample_mask;
2902
 
2903
	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
2904
	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
2905
	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
2906
	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
2907
	sctx->b.b.set_index_buffer = si_set_index_buffer;
2908
 
2909
	sctx->b.b.texture_barrier = si_texture_barrier;
2910
	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2911
	sctx->b.b.set_min_samples = si_set_min_samples;
2912
 
2913
	sctx->b.dma_copy = si_dma_copy;
2914
	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
2915
	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
2916
 
2917
	sctx->b.b.draw_vbo = si_draw_vbo;
2918
}
2919
 
2920
static void
2921
si_write_harvested_raster_configs(struct si_context *sctx,
2922
				  struct si_pm4_state *pm4,
2923
				  unsigned raster_config)
2924
{
2925
	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
2926
	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
2927
	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
2928
	unsigned num_rb = sctx->screen->b.info.r600_num_backends;
2929
	unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
2930
	unsigned rb_per_se = num_rb / num_se;
2931
	unsigned se0_mask = (1 << rb_per_se) - 1;
2932
	unsigned se1_mask = se0_mask << rb_per_se;
2933
	unsigned se;
2934
 
2935
	assert(num_se == 1 || num_se == 2);
2936
	assert(sh_per_se == 1 || sh_per_se == 2);
2937
	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
2938
 
2939
	/* XXX: I can't figure out what the *_XSEL and *_YSEL
2940
	 * fields are for, so I'm leaving them as their default
2941
	 * values. */
2942
 
2943
	se0_mask &= rb_mask;
2944
	se1_mask &= rb_mask;
2945
	if (num_se == 2 && (!se0_mask || !se1_mask)) {
2946
		raster_config &= C_028350_SE_MAP;
2947
 
2948
		if (!se0_mask) {
2949
			raster_config |=
2950
				S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
2951
		} else {
2952
			raster_config |=
2953
				S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
2954
		}
2955
	}
2956
 
2957
	for (se = 0; se < num_se; se++) {
2958
		unsigned raster_config_se = raster_config;
2959
		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
2960
		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
2961
 
2962
		pkr0_mask &= rb_mask;
2963
		pkr1_mask &= rb_mask;
2964
		if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
2965
			raster_config_se &= C_028350_PKR_MAP;
2966
 
2967
			if (!pkr0_mask) {
2968
				raster_config_se |=
2969
					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
2970
			} else {
2971
				raster_config_se |=
2972
					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
2973
			}
2974
		}
2975
 
2976
		if (rb_per_pkr == 2) {
2977
			unsigned rb0_mask = 1 << (se * rb_per_se);
2978
			unsigned rb1_mask = rb0_mask << 1;
2979
 
2980
			rb0_mask &= rb_mask;
2981
			rb1_mask &= rb_mask;
2982
			if (!rb0_mask || !rb1_mask) {
2983
				raster_config_se &= C_028350_RB_MAP_PKR0;
2984
 
2985
				if (!rb0_mask) {
2986
					raster_config_se |=
2987
						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
2988
				} else {
2989
					raster_config_se |=
2990
						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
2991
				}
2992
			}
2993
 
2994
			if (sh_per_se == 2) {
2995
				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
2996
				rb1_mask = rb0_mask << 1;
2997
				rb0_mask &= rb_mask;
2998
				rb1_mask &= rb_mask;
2999
				if (!rb0_mask || !rb1_mask) {
3000
					raster_config_se &= C_028350_RB_MAP_PKR1;
3001
 
3002
					if (!rb0_mask) {
3003
						raster_config_se |=
3004
							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3005
					} else {
3006
						raster_config_se |=
3007
							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3008
					}
3009
				}
3010
			}
3011
		}
3012
 
3013
		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3014
			       SE_INDEX(se) | SH_BROADCAST_WRITES |
3015
			       INSTANCE_BROADCAST_WRITES);
3016
		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3017
	}
3018
 
3019
	si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3020
		       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3021
		       INSTANCE_BROADCAST_WRITES);
3022
}
3023
 
3024
void si_init_config(struct si_context *sctx)
3025
{
3026
	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3027
 
3028
	if (pm4 == NULL)
3029
		return;
3030
 
3031
	si_cmd_context_control(pm4);
3032
 
3033
	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
3034
	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
3035
 
3036
	/* FIXME calculate these values somehow ??? */
3037
	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
3038
	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3039
	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3040
 
3041
	si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
3042
	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3043
	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
3044
	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3045
 
3046
	si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
3047
	si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
3048
	si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
3049
	si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
3050
 
3051
	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3052
	si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
3053
	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3054
	if (sctx->b.chip_class < CIK)
3055
		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3056
			       S_008A14_CLIP_VTX_REORDER_ENA(1));
3057
 
3058
	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3059
	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3060
 
3061
	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3062
 
3063
	if (sctx->b.chip_class >= CIK) {
3064
		switch (sctx->screen->b.family) {
3065
		case CHIP_BONAIRE:
3066
			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
3067
			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
3068
			break;
3069
		case CHIP_HAWAII:
3070
			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
3071
			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
3072
			break;
3073
		case CHIP_KAVERI:
3074
			/* XXX todo */
3075
		case CHIP_KABINI:
3076
			/* XXX todo */
3077
		case CHIP_MULLINS:
3078
			/* XXX todo */
3079
		default:
3080
			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0);
3081
			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
3082
			break;
3083
		}
3084
	} else {
3085
		unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
3086
		unsigned num_rb = sctx->screen->b.info.r600_num_backends;
3087
		unsigned raster_config;
3088
 
3089
		switch (sctx->screen->b.family) {
3090
		case CHIP_TAHITI:
3091
		case CHIP_PITCAIRN:
3092
			raster_config = 0x2a00126a;
3093
			break;
3094
		case CHIP_VERDE:
3095
			raster_config = 0x0000124a;
3096
			break;
3097
		case CHIP_OLAND:
3098
			raster_config = 0x00000082;
3099
			break;
3100
		case CHIP_HAINAN:
3101
			raster_config = 0;
3102
			break;
3103
		default:
3104
			fprintf(stderr,
3105
				"radeonsi: Unknown GPU, using 0 for raster_config\n");
3106
			raster_config = 0;
3107
			break;
3108
		}
3109
 
3110
		/* Always use the default config when all backends are enabled
3111
		 * (or when we failed to determine the enabled backends).
3112
		 */
3113
		if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
3114
			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
3115
				       raster_config);
3116
		} else {
3117
			si_write_harvested_raster_configs(sctx, pm4, raster_config);
3118
		}
3119
	}
3120
 
3121
	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3122
	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3123
	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3124
		       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3125
	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3126
	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3127
		       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3128
 
3129
	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3130
	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3131
	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
3132
	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
3133
	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
3134
	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
3135
	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
3136
	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
3137
	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
3138
	si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
3139
	si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
3140
	si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0);
3141
	si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0);
3142
	si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
3143
	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3144
	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3145
	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3146
 
3147
	/* There is a hang if stencil is used and fast stencil is enabled
3148
	 * regardless of whether HTILE is depth-only or not.
3149
	 */
3150
	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
3151
		       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
3152
		       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
3153
		       S_02800C_FAST_STENCIL_DISABLE(1));
3154
 
3155
	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3156
	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3157
	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
3158
 
3159
	if (sctx->b.chip_class >= CIK) {
3160
		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3161
		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
3162
		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3163
	}
3164
 
3165
	sctx->init_config = pm4;
3166
}