Subversion Repositories Kolibri OS

Rev

Rev 2005 | Rev 3192 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2005 Rev 2997
Line 22... Line 22...
22
 *
22
 *
23
 * Authors:
23
 * Authors:
24
 *     Alex Deucher 
24
 *     Alex Deucher 
25
 */
25
 */
Line 26... Line 26...
26
 
26
 
27
#include "drmP.h"
-
 
28
#include "drm.h"
27
#include 
29
#include "radeon_drm.h"
28
#include 
Line 30... Line 29...
30
#include "radeon.h"
29
#include "radeon.h"
31
 
30
 
32
#include "evergreend.h"
31
#include "evergreend.h"
33
#include "evergreen_blit_shaders.h"
-
 
34
#include "cayman_blit_shaders.h"
-
 
35
 
-
 
36
#define DI_PT_RECTLIST        0x11
32
#include "evergreen_blit_shaders.h"
37
#define DI_INDEX_SIZE_16_BIT  0x0
-
 
38
#define DI_SRC_SEL_AUTO_INDEX 0x2
-
 
39
 
-
 
40
#define FMT_8                 0x1
-
 
41
#define FMT_5_6_5             0x8
-
 
42
#define FMT_8_8_8_8           0x1a
-
 
43
#define COLOR_8               0x1
-
 
Line 44... Line 33...
44
#define COLOR_5_6_5           0x8
33
#include "cayman_blit_shaders.h"
45
#define COLOR_8_8_8_8         0x1a
34
#include "radeon_blit_common.h"
46
 
35
 
47
/* emits 17 */
36
/* emits 17 */
48
static void
37
static void
-
 
38
set_render_target(struct radeon_device *rdev, int format,
49
set_render_target(struct radeon_device *rdev, int format,
39
		  int w, int h, u64 gpu_addr)
50
		  int w, int h, u64 gpu_addr)
40
{
Line 51... Line 41...
51
{
41
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
52
	u32 cb_color_info;
42
	u32 cb_color_info;
53
	int pitch, slice;
43
	int pitch, slice;
Line 54... Line 44...
54
 
44
 
-
 
45
	h = ALIGN(h, 8);
-
 
46
	if (h < 8)
55
	h = ALIGN(h, 8);
47
		h = 8;
56
	if (h < 8)
48
 
Line 57... Line 49...
57
		h = 8;
49
	cb_color_info = CB_FORMAT(format) |
58
 
50
		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
59
	cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));
51
		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
60
	pitch = (w / 8) - 1;
52
	pitch = (w / 8) - 1;
61
	slice = ((w * h) / 64) - 1;
53
	slice = ((w * h) / 64) - 1;
62
 
54
 
63
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
55
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
64
	radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
56
	radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
65
	radeon_ring_write(rdev, gpu_addr >> 8);
57
	radeon_ring_write(ring, gpu_addr >> 8);
66
	radeon_ring_write(rdev, pitch);
58
	radeon_ring_write(ring, pitch);
67
	radeon_ring_write(rdev, slice);
59
	radeon_ring_write(ring, slice);
68
	radeon_ring_write(rdev, 0);
60
	radeon_ring_write(ring, 0);
69
	radeon_ring_write(rdev, cb_color_info);
61
	radeon_ring_write(ring, cb_color_info);
70
	radeon_ring_write(rdev, (1 << 4));
62
	radeon_ring_write(ring, 0);
71
	radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
63
	radeon_ring_write(ring, (w - 1) | ((h - 1) << 16));
72
	radeon_ring_write(rdev, 0);
64
	radeon_ring_write(ring, 0);
73
	radeon_ring_write(rdev, 0);
65
	radeon_ring_write(ring, 0);
74
	radeon_ring_write(rdev, 0);
66
	radeon_ring_write(ring, 0);
Line 75... Line 67...
75
	radeon_ring_write(rdev, 0);
67
	radeon_ring_write(ring, 0);
76
	radeon_ring_write(rdev, 0);
68
	radeon_ring_write(ring, 0);
77
	radeon_ring_write(rdev, 0);
69
	radeon_ring_write(ring, 0);
78
	radeon_ring_write(rdev, 0);
70
	radeon_ring_write(ring, 0);
79
	radeon_ring_write(rdev, 0);
71
	radeon_ring_write(ring, 0);
80
}
72
}
-
 
73
 
81
 
74
/* emits 5dw */
Line 82... Line 75...
82
/* emits 5dw */
75
static void
83
static void
76
cp_set_surface_sync(struct radeon_device *rdev,
84
cp_set_surface_sync(struct radeon_device *rdev,
77
		    u32 sync_type, u32 size,
85
		    u32 sync_type, u32 size,
78
		    u64 mc_addr)
Line -... Line 79...
-
 
79
{
-
 
80
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-
 
81
	u32 cp_coher_size;
-
 
82
 
-
 
83
	if (size == 0xffffffff)
-
 
84
		cp_coher_size = 0xffffffff;
-
 
85
	else
-
 
86
		cp_coher_size = ((size + 255) >> 8);
-
 
87
 
86
		    u64 mc_addr)
88
	if (rdev->family >= CHIP_CAYMAN) {
87
{
89
		/* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync
88
	u32 cp_coher_size;
90
		 * to the RB directly. For IBs, the CP programs this as part of the
89
 
91
		 * surface_sync packet.
90
	if (size == 0xffffffff)
92
		 */
91
		cp_coher_size = 0xffffffff;
93
		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
Line 92... Line 94...
92
	else
94
		radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2);
93
		cp_coher_size = ((size + 255) >> 8);
95
		radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */
94
 
96
	}
95
	radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
97
	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
-
 
98
	radeon_ring_write(ring, sync_type);
96
	radeon_ring_write(rdev, sync_type);
99
	radeon_ring_write(ring, cp_coher_size);
Line 97... Line 100...
97
	radeon_ring_write(rdev, cp_coher_size);
100
	radeon_ring_write(ring, mc_addr >> 8);
98
	radeon_ring_write(rdev, mc_addr >> 8);
101
	radeon_ring_write(ring, 10); /* poll interval */
99
	radeon_ring_write(rdev, 10); /* poll interval */
102
}
100
}
103
 
101
 
104
/* emits 11dw + 1 surface sync = 16dw */
102
/* emits 11dw + 1 surface sync = 16dw */
105
static void
103
static void
106
set_shaders(struct radeon_device *rdev)
Line 104... Line 107...
104
set_shaders(struct radeon_device *rdev)
107
{
105
{
108
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
106
	u64 gpu_addr;
109
	u64 gpu_addr;
107
 
110
 
108
	/* VS */
111
	/* VS */
109
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
112
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
110
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
113
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
111
	radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
114
	radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
Line 112... Line 115...
112
	radeon_ring_write(rdev, gpu_addr >> 8);
115
	radeon_ring_write(ring, gpu_addr >> 8);
113
	radeon_ring_write(rdev, 2);
116
	radeon_ring_write(ring, 2);
114
	radeon_ring_write(rdev, 0);
117
	radeon_ring_write(ring, 0);
Line 115... Line 118...
115
 
118
 
116
	/* PS */
119
	/* PS */
117
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
120
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
118
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
121
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
-
 
122
	radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
119
	radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
123
	radeon_ring_write(ring, gpu_addr >> 8);
Line 120... Line 124...
120
	radeon_ring_write(rdev, gpu_addr >> 8);
124
	radeon_ring_write(ring, 1);
121
	radeon_ring_write(rdev, 1);
125
	radeon_ring_write(ring, 0);
-
 
126
	radeon_ring_write(ring, 2);
122
	radeon_ring_write(rdev, 0);
127
 
123
	radeon_ring_write(rdev, 2);
128
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
124
 
129
	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
125
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
130
}
126
	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
131
 
-
 
132
/* emits 10 + 1 sync (5) = 15 */
-
 
133
static void
-
 
134
set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
127
}
135
{
128
 
136
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
129
/* emits 10 + 1 sync (5) = 15 */
137
	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
130
static void
138
 
131
set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
139
	/* high addr, stride */
132
{
140
	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
133
	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
141
		SQ_VTXC_STRIDE(16);
134
 
142
#ifdef __BIG_ENDIAN
135
	/* high addr, stride */
143
	sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
136
	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
144
#endif
137
#ifdef __BIG_ENDIAN
145
	/* xyzw swizzles */
Line 138... Line 146...
138
	sq_vtx_constant_word2 |= (2 << 30);
146
	sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
139
#endif
147
		SQ_VTCX_SEL_Y(SQ_SEL_Y) |
140
	/* xyzw swizzles */
148
		SQ_VTCX_SEL_Z(SQ_SEL_Z) |
141
	sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
149
		SQ_VTCX_SEL_W(SQ_SEL_W);
Line 166... Line 174...
166
 
174
 
167
/* emits 10 */
175
/* emits 10 */
168
static void
176
static void
169
set_tex_resource(struct radeon_device *rdev,
177
set_tex_resource(struct radeon_device *rdev,
170
		 int format, int w, int h, int pitch,
178
		 int format, int w, int h, int pitch,
171
		 u64 gpu_addr)
179
		 u64 gpu_addr, u32 size)
-
 
180
{
172
{
181
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
173
	u32 sq_tex_resource_word0, sq_tex_resource_word1;
182
	u32 sq_tex_resource_word0, sq_tex_resource_word1;
Line 174... Line 183...
174
	u32 sq_tex_resource_word4, sq_tex_resource_word7;
183
	u32 sq_tex_resource_word4, sq_tex_resource_word7;
175
 
184
 
Line 176... Line 185...
176
	if (h < 1)
185
	if (h < 1)
177
		h = 1;
186
		h = 1;
178
 
187
 
179
	sq_tex_resource_word0 = (1 << 0); /* 2D */
188
	sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
-
 
189
	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
180
	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
190
				  ((w - 1) << 18));
181
				  ((w - 1) << 18));
191
	sq_tex_resource_word1 = ((h - 1) << 0) |
-
 
192
				TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-
 
193
	/* xyzw swizzles */
-
 
194
	sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
Line -... Line 195...
-
 
195
				TEX_DST_SEL_Y(SQ_SEL_Y) |
182
	sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);
196
				TEX_DST_SEL_Z(SQ_SEL_Z) |
Line -... Line 197...
-
 
197
				TEX_DST_SEL_W(SQ_SEL_W);
-
 
198
 
-
 
199
	sq_tex_resource_word7 = format |
183
	/* xyzw swizzles */
200
		S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
184
	sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
201
 
185
 
202
	cp_set_surface_sync(rdev,
186
	sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
203
			    PACKET3_TC_ACTION_ENA, size, gpu_addr);
187
 
204
 
188
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
205
	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8));
189
	radeon_ring_write(rdev, 0);
206
	radeon_ring_write(ring, 0);
190
	radeon_ring_write(rdev, sq_tex_resource_word0);
207
	radeon_ring_write(ring, sq_tex_resource_word0);
191
	radeon_ring_write(rdev, sq_tex_resource_word1);
208
	radeon_ring_write(ring, sq_tex_resource_word1);
192
	radeon_ring_write(rdev, gpu_addr >> 8);
209
	radeon_ring_write(ring, gpu_addr >> 8);
193
	radeon_ring_write(rdev, gpu_addr >> 8);
210
	radeon_ring_write(ring, gpu_addr >> 8);
Line 194... Line 211...
194
	radeon_ring_write(rdev, sq_tex_resource_word4);
211
	radeon_ring_write(ring, sq_tex_resource_word4);
195
	radeon_ring_write(rdev, 0);
212
	radeon_ring_write(ring, 0);
196
	radeon_ring_write(rdev, 0);
213
	radeon_ring_write(ring, 0);
197
	radeon_ring_write(rdev, sq_tex_resource_word7);
214
	radeon_ring_write(ring, sq_tex_resource_word7);
198
}
215
}
-
 
216
 
199
 
217
/* emits 12 */
200
/* emits 12 */
218
static void
201
static void
219
set_scissors(struct radeon_device *rdev, int x1, int y1,
202
set_scissors(struct radeon_device *rdev, int x1, int y1,
220
	     int x2, int y2)
203
	     int x2, int y2)
221
{
204
{
222
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
205
	/* workaround some hw bugs */
223
	/* workaround some hw bugs */
206
	if (x2 == 0)
224
	if (x2 == 0)
207
		x1 = 1;
225
		x1 = 1;
Line 208... Line 226...
208
	if (y2 == 0)
226
	if (y2 == 0)
209
		y1 = 1;
227
		y1 = 1;
210
	if (rdev->family == CHIP_CAYMAN) {
228
	if (rdev->family >= CHIP_CAYMAN) {
211
		if ((x2 == 1) && (y2 == 1))
229
		if ((x2 == 1) && (y2 == 1))
212
			x2 = 2;
230
			x2 = 2;
213
	}
231
	}
214
 
232
 
215
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
233
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
216
	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
234
	radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
217
	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
235
	radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
218
	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
236
	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
219
 
237
 
220
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
238
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
221
	radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
239
	radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
222
	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
240
	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
Line 223... Line 241...
223
	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
241
	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
224
 
242
 
225
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
243
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
226
	radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
244
	radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-
 
245
	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
227
	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
246
	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
228
	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
247
}
229
}
248
 
Line 230... Line 249...
230
 
249
/* emits 10 */
231
/* emits 10 */
250
static void
232
static void
251
draw_auto(struct radeon_device *rdev)
233
draw_auto(struct radeon_device *rdev)
252
{
234
{
253
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
235
	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
254
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
Line 236... Line 255...
236
	radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
255
	radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
237
	radeon_ring_write(rdev, DI_PT_RECTLIST);
256
	radeon_ring_write(ring, DI_PT_RECTLIST);
Line 238... Line 257...
238
 
257
 
239
	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
258
	radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
240
	radeon_ring_write(rdev,
259
	radeon_ring_write(ring,
Line 241... Line 260...
241
#ifdef __BIG_ENDIAN
260
#ifdef __BIG_ENDIAN
Line 242... Line 261...
242
			  (2 << 2) |
261
			  (2 << 2) |
243
#endif
262
#endif
244
			  DI_INDEX_SIZE_16_BIT);
263
			  DI_INDEX_SIZE_16_BIT);
245
 
264
 
-
 
265
	radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
246
	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
266
	radeon_ring_write(ring, 1);
247
	radeon_ring_write(rdev, 1);
267
 
248
 
268
	radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
249
	radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
269
	radeon_ring_write(ring, 3);
250
	radeon_ring_write(rdev, 3);
270
	radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
Line 267... Line 287...
267
	int num_hs_stack_entries, num_ls_stack_entries;
287
	int num_hs_stack_entries, num_ls_stack_entries;
268
	u64 gpu_addr;
288
	u64 gpu_addr;
269
	int dwords;
289
	int dwords;
Line 270... Line 290...
270
 
290
 
271
	/* set clear context state */
291
	/* set clear context state */
272
	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
292
	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
Line 273... Line 293...
273
	radeon_ring_write(rdev, 0);
293
	radeon_ring_write(ring, 0);
274
 
294
 
275
	if (rdev->family < CHIP_CAYMAN) {
295
	if (rdev->family < CHIP_CAYMAN) {
276
		switch (rdev->family) {
296
		switch (rdev->family) {
Line 525... Line 545...
525
					    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
545
					    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
526
		sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
546
		sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
527
					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
547
					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
Line 528... Line 548...
528
 
548
 
529
		/* disable dyn gprs */
549
		/* disable dyn gprs */
530
		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
550
		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
531
		radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
551
		radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
Line 532... Line 552...
532
		radeon_ring_write(rdev, 0);
552
		radeon_ring_write(ring, 0);
533
 
553
 
534
		/* setup LDS */
554
		/* setup LDS */
535
		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
555
		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
Line 536... Line 556...
536
		radeon_ring_write(rdev, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
556
		radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
537
		radeon_ring_write(rdev, 0x10001000);
557
		radeon_ring_write(ring, 0x10001000);
538
 
558
 
539
		/* SQ config */
559
		/* SQ config */
540
		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
560
		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11));
541
		radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
561
		radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
542
		radeon_ring_write(rdev, sq_config);
562
		radeon_ring_write(ring, sq_config);
543
		radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
563
		radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
544
		radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
564
		radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
545
		radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
565
		radeon_ring_write(ring, sq_gpr_resource_mgmt_3);
546
		radeon_ring_write(rdev, 0);
566
		radeon_ring_write(ring, 0);
547
		radeon_ring_write(rdev, 0);
567
		radeon_ring_write(ring, 0);
548
		radeon_ring_write(rdev, sq_thread_resource_mgmt);
568
		radeon_ring_write(ring, sq_thread_resource_mgmt);
549
		radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
569
		radeon_ring_write(ring, sq_thread_resource_mgmt_2);
550
		radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
570
		radeon_ring_write(ring, sq_stack_resource_mgmt_1);
Line 551... Line 571...
551
		radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
571
		radeon_ring_write(ring, sq_stack_resource_mgmt_2);
552
		radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
572
		radeon_ring_write(ring, sq_stack_resource_mgmt_3);
553
	}
573
	}
554
 
574
 
Line 555... Line 575...
555
	/* CONTEXT_CONTROL */
575
	/* CONTEXT_CONTROL */
556
	radeon_ring_write(rdev, 0xc0012800);
576
	radeon_ring_write(ring, 0xc0012800);
557
	radeon_ring_write(rdev, 0x80000000);
577
	radeon_ring_write(ring, 0x80000000);
558
	radeon_ring_write(rdev, 0x80000000);
578
	radeon_ring_write(ring, 0x80000000);
559
 
579
 
Line 560... Line 580...
560
	/* SQ_VTX_BASE_VTX_LOC */
580
	/* SQ_VTX_BASE_VTX_LOC */
561
	radeon_ring_write(rdev, 0xc0026f00);
581
	radeon_ring_write(ring, 0xc0026f00);
562
	radeon_ring_write(rdev, 0x00000000);
582
	radeon_ring_write(ring, 0x00000000);
563
	radeon_ring_write(rdev, 0x00000000);
583
	radeon_ring_write(ring, 0x00000000);
564
	radeon_ring_write(rdev, 0x00000000);
584
	radeon_ring_write(ring, 0x00000000);
565
 
585
 
Line 566... Line 586...
566
	/* SET_SAMPLER */
586
	/* SET_SAMPLER */
567
	radeon_ring_write(rdev, 0xc0036e00);
587
	radeon_ring_write(ring, 0xc0036e00);
568
	radeon_ring_write(rdev, 0x00000000);
588
	radeon_ring_write(ring, 0x00000000);
Line 569... Line 589...
569
	radeon_ring_write(rdev, 0x00000012);
589
	radeon_ring_write(ring, 0x00000012);
570
	radeon_ring_write(rdev, 0x00000000);
590
	radeon_ring_write(ring, 0x00000000);
571
	radeon_ring_write(rdev, 0x00000000);
591
	radeon_ring_write(ring, 0x00000000);
572
 
592
 
573
	/* set to DX10/11 mode */
593
	/* set to DX10/11 mode */
574
	radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0));
594
	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
575
	radeon_ring_write(rdev, 1);
595
	radeon_ring_write(ring, 1);
Line 576... Line 596...
576
 
596
 
Line 577... Line -...
577
	/* emit an IB pointing at default state */
-
 
578
	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
-
 
579
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
-
 
580
	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-
 
581
	radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
-
 
582
	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
-
 
583
	radeon_ring_write(rdev, dwords);
-
 
584
 
-
 
585
}
-
 
586
 
-
 
587
static inline uint32_t i2f(uint32_t input)
-
 
588
{
-
 
589
	u32 result, i, exponent, fraction;
-
 
590
 
-
 
591
	if ((input & 0x3fff) == 0)
-
 
592
		result = 0; /* 0 is a special case */
-
 
593
	else {
-
 
594
		exponent = 140; /* exponent biased by 127; */
-
 
595
		fraction = (input & 0x3fff) << 10; /* cheat and only
-
 
596
						      handle numbers below 2^^15 */
-
 
597
		for (i = 0; i < 14; i++) {
-
 
598
			if (fraction & 0x800000)
-
 
599
				break;
-
 
600
			else {
-
 
601
				fraction = fraction << 1; /* keep
-
 
602
							     shifting left until top bit = 1 */
597
	/* emit an IB pointing at default state */
603
				exponent = exponent - 1;
598
	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
604
			}
599
	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
605
		}
600
	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
606
		result = exponent << 23 | (fraction & 0x7fffff); /* mask
601
	radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC);
607
								    off top bit; assumed 1 */
602
	radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
608
	}
603
	radeon_ring_write(ring, dwords);
-
 
604
 
-
 
605
}
-
 
606
 
-
 
607
int evergreen_blit_init(struct radeon_device *rdev)
-
 
608
{
-
 
609
	u32 obj_size;
-
 
610
	int i, r, dwords;
-
 
611
	void *ptr;
-
 
612
	u32 packet2s[16];
-
 
613
	int num_packet2s = 0;
-
 
614
#if 0
-
 
615
	rdev->r600_blit.primitives.set_render_target = set_render_target;
-
 
616
	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
-
 
617
	rdev->r600_blit.primitives.set_shaders = set_shaders;
-
 
618
	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
-
 
619
	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
-
 
620
	rdev->r600_blit.primitives.set_scissors = set_scissors;
-
 
621
	rdev->r600_blit.primitives.draw_auto = draw_auto;
-
 
622
	rdev->r600_blit.primitives.set_default_state = set_default_state;
-
 
623
 
-
 
624
	rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
Line 609... Line 625...
609
	return result;
625
	rdev->r600_blit.ring_size_common += 55; /* shaders + def state */
610
}
626
	rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
611
 
627
	rdev->r600_blit.ring_size_common += 5; /* done copy */
Line 708... Line 724...
708
	if (r) {
724
	if (r) {
709
		dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
725
		dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
710
		return r;
726
		return r;
711
	}
727
	}
712
//   radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
728
//   radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
713
	return 0;
-
 
714
}
-
 
715
 
-
 
716
void evergreen_blit_fini(struct radeon_device *rdev)
-
 
717
{
-
 
718
	int r;
-
 
719
 
-
 
720
//   radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-
 
721
	if (rdev->r600_blit.shader_obj == NULL)
-
 
722
		return;
-
 
723
	/* If we can't reserve the bo, unref should be enough to destroy
-
 
724
	 * it when it becomes idle.
-
 
725
	 */
-
 
726
	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-
 
727
	if (!r) {
-
 
728
		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-
 
729
		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-
 
730
	}
-
 
731
	radeon_bo_unref(&rdev->r600_blit.shader_obj);
-
 
732
}
-
 
733
 
-
 
734
static int evergreen_vb_ib_get(struct radeon_device *rdev)
-
 
735
{
-
 
736
	int r;
-
 
737
	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
-
 
738
	if (r) {
-
 
739
		DRM_ERROR("failed to get IB for vertex buffer\n");
-
 
740
		return r;
-
 
741
	}
-
 
742
 
-
 
743
	rdev->r600_blit.vb_total = 64*1024;
-
 
744
	rdev->r600_blit.vb_used = 0;
-
 
745
	return 0;
-
 
746
}
-
 
747
 
-
 
748
static void evergreen_vb_ib_put(struct radeon_device *rdev)
-
 
749
{
-
 
750
	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
-
 
751
	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-
 
752
}
-
 
753
 
-
 
754
int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
-
 
755
{
-
 
756
	int r;
-
 
757
	int ring_size, line_size;
-
 
758
	int max_size;
-
 
759
	/* loops of emits + fence emit possible */
-
 
760
	int dwords_per_loop = 74, num_loops;
-
 
761
 
-
 
762
	r = evergreen_vb_ib_get(rdev);
-
 
763
	if (r)
-
 
764
		return r;
-
 
765
 
-
 
766
	/* 8 bpp vs 32 bpp for xfer unit */
-
 
767
	if (size_bytes & 3)
-
 
768
		line_size = 8192;
-
 
769
	else
-
 
770
		line_size = 8192 * 4;
-
 
771
 
-
 
772
	max_size = 8192 * line_size;
-
 
Line 773... Line -...
773
 
-
 
774
	/* major loops cover the max size transfer */
-
 
775
	num_loops = ((size_bytes + max_size) / max_size);
-
 
776
	/* minor loops cover the extra non aligned bits */
-
 
777
	num_loops += ((size_bytes % line_size) ? 1 : 0);
-
 
778
	/* calculate number of loops correctly */
-
 
779
	ring_size = num_loops * dwords_per_loop;
-
 
780
	/* set default  + shaders */
-
 
781
	ring_size += 55; /* shaders + def state */
-
 
782
	ring_size += 10; /* fence emit for VB IB */
-
 
783
	ring_size += 5; /* done copy */
-
 
784
	ring_size += 10; /* fence emit for done copy */
-
 
785
	r = radeon_ring_lock(rdev, ring_size);
729
 
786
	if (r)
-
 
Line 787... Line -...
787
		return r;
-
 
788
 
-
 
789
	set_default_state(rdev); /* 36 */
730
#endif
790
	set_shaders(rdev); /* 16 */
731
 
791
	return 0;
-
 
792
}
-
 
793
 
-
 
794
void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
-
 
795
{
-
 
796
	int r;
-
 
797
 
-
 
798
	if (rdev->r600_blit.vb_ib)
-
 
799
		evergreen_vb_ib_put(rdev);
-
 
800
 
-
 
801
	if (fence)
-
 
802
		r = radeon_fence_emit(rdev, fence);
-
 
803
 
-
 
804
	radeon_ring_unlock_commit(rdev);
-
 
805
}
-
 
806
 
-
 
807
void evergreen_kms_blit_copy(struct radeon_device *rdev,
-
 
808
			     u64 src_gpu_addr, u64 dst_gpu_addr,
-
 
809
			     int size_bytes)
-
 
810
{
-
 
811
	int max_bytes;
-
 
812
	u64 vb_gpu_addr;
-
 
813
	u32 *vb;
-
 
814
 
-
 
815
	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-
 
816
		  size_bytes, rdev->r600_blit.vb_used);
-
 
817
	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-
 
818
	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-
 
819
		max_bytes = 8192;
-
 
820
 
-
 
821
		while (size_bytes) {
-
 
822
			int cur_size = size_bytes;
-
 
823
			int src_x = src_gpu_addr & 255;
-
 
824
			int dst_x = dst_gpu_addr & 255;
-
 
825
			int h = 1;
-
 
826
			src_gpu_addr = src_gpu_addr & ~255ULL;
-
 
827
			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
 
828
 
-
 
829
			if (!src_x && !dst_x) {
-
 
830
				h = (cur_size / max_bytes);
-
 
831
				if (h > 8192)
-
 
832
					h = 8192;
-
 
833
				if (h == 0)
-
 
834
					h = 1;
-
 
835
				else
-
 
836
					cur_size = max_bytes;
-
 
837
			} else {
-
 
838
				if (cur_size > max_bytes)
-
 
839
					cur_size = max_bytes;
-
 
840
				if (cur_size > (max_bytes - dst_x))
-
 
841
					cur_size = (max_bytes - dst_x);
-
 
842
				if (cur_size > (max_bytes - src_x))
-
 
843
					cur_size = (max_bytes - src_x);
-
 
844
			}
-
 
845
 
-
 
846
			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-
 
847
            //   WARN_ON(1);
-
 
848
			}
-
 
849
 
-
 
850
			vb[0] = i2f(dst_x);
-
 
851
			vb[1] = 0;
-
 
852
			vb[2] = i2f(src_x);
-
 
853
			vb[3] = 0;
-
 
854
 
-
 
855
			vb[4] = i2f(dst_x);
-
 
856
			vb[5] = i2f(h);
-
 
857
			vb[6] = i2f(src_x);
-
 
858
			vb[7] = i2f(h);
-
 
859
 
-
 
860
			vb[8] = i2f(dst_x + cur_size);
-
 
861
			vb[9] = i2f(h);
-
 
862
			vb[10] = i2f(src_x + cur_size);
-
 
863
			vb[11] = i2f(h);
-
 
864
 
-
 
865
			/* src 10 */
-
 
866
			set_tex_resource(rdev, FMT_8,
-
 
867
					 src_x + cur_size, h, src_x + cur_size,
-
 
868
					 src_gpu_addr);
-
 
869
 
-
 
870
			/* 5 */
-
 
871
			cp_set_surface_sync(rdev,
-
 
872
					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
 
873
 
-
 
874
 
-
 
875
			/* dst 17 */
-
 
876
			set_render_target(rdev, COLOR_8,
-
 
877
					  dst_x + cur_size, h,
-
 
878
					  dst_gpu_addr);
-
 
879
 
-
 
880
			/* scissors 12 */
-
 
881
			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
-
 
882
 
-
 
883
			/* 15 */
-
 
884
			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-
 
885
			set_vtx_resource(rdev, vb_gpu_addr);
-
 
886
 
-
 
887
			/* draw 10 */
-
 
888
			draw_auto(rdev);
-
 
889
 
-
 
890
			/* 5 */
-
 
891
			cp_set_surface_sync(rdev,
-
 
892
					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-
 
893
					    cur_size * h, dst_gpu_addr);
-
 
894
 
-
 
895
			vb += 12;
-
 
896
			rdev->r600_blit.vb_used += 12 * 4;
-
 
897
 
-
 
898
			src_gpu_addr += cur_size * h;
-
 
899
			dst_gpu_addr += cur_size * h;
-
 
900
			size_bytes -= cur_size * h;
-
 
901
		}
-
 
902
	} else {
-
 
903
		max_bytes = 8192 * 4;
-
 
904
 
-
 
905
		while (size_bytes) {
-
 
906
			int cur_size = size_bytes;
-
 
907
			int src_x = (src_gpu_addr & 255);
-
 
908
			int dst_x = (dst_gpu_addr & 255);
-
 
909
			int h = 1;
-
 
910
			src_gpu_addr = src_gpu_addr & ~255ULL;
-
 
911
			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
 
912
 
-
 
913
			if (!src_x && !dst_x) {
-
 
914
				h = (cur_size / max_bytes);
-
 
915
				if (h > 8192)
-
 
916
					h = 8192;
-
 
917
				if (h == 0)
-
 
918
					h = 1;
-
 
919
				else
-
 
920
					cur_size = max_bytes;
-
 
921
			} else {
-
 
922
				if (cur_size > max_bytes)
-
 
923
					cur_size = max_bytes;
-
 
924
				if (cur_size > (max_bytes - dst_x))
-
 
925
					cur_size = (max_bytes - dst_x);
-
 
926
				if (cur_size > (max_bytes - src_x))
-
 
927
					cur_size = (max_bytes - src_x);
-
 
928
			}
-
 
929
 
-
 
930
			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-
 
931
           //        WARN_ON(1);
-
 
932
			}
-
 
933
 
-
 
934
			vb[0] = i2f(dst_x / 4);
-
 
935
			vb[1] = 0;
-
 
936
			vb[2] = i2f(src_x / 4);
-
 
937
			vb[3] = 0;
-
 
938
 
-
 
939
			vb[4] = i2f(dst_x / 4);
-
 
940
			vb[5] = i2f(h);
-
 
941
			vb[6] = i2f(src_x / 4);
-
 
942
			vb[7] = i2f(h);
-
 
943
 
-
 
944
			vb[8] = i2f((dst_x + cur_size) / 4);
-
 
945
			vb[9] = i2f(h);
-
 
946
			vb[10] = i2f((src_x + cur_size) / 4);
-
 
947
			vb[11] = i2f(h);
-
 
948
 
-
 
949
			/* src 10 */
-
 
950
			set_tex_resource(rdev, FMT_8_8_8_8,
-
 
951
					 (src_x + cur_size) / 4,
-
 
952
					 h, (src_x + cur_size) / 4,
-
 
953
					 src_gpu_addr);
-
 
954
			/* 5 */
-
 
955
			cp_set_surface_sync(rdev,
-
 
956
					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
 
957
 
-
 
958
			/* dst 17 */
-
 
959
			set_render_target(rdev, COLOR_8_8_8_8,
-
 
960
					  (dst_x + cur_size) / 4, h,
-
 
961
					  dst_gpu_addr);
-
 
962
 
-
 
963
			/* scissors 12  */
-
 
964
			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
 
965
 
-
 
966
			/* Vertex buffer setup 15 */
-
 
967
			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-
 
968
			set_vtx_resource(rdev, vb_gpu_addr);
-
 
969
 
-
 
970
			/* draw 10 */
-
 
971
			draw_auto(rdev);
-
 
972
 
-
 
973
			/* 5 */
-
 
974
			cp_set_surface_sync(rdev,
-
 
975
					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-
 
976
					    cur_size * h, dst_gpu_addr);
-
 
977
 
-
 
978
			/* 74 ring dwords per loop */
-
 
979
			vb += 12;
-
 
980
			rdev->r600_blit.vb_used += 12 * 4;
-
 
981
 
-
 
982
			src_gpu_addr += cur_size * h;
-
 
983
			dst_gpu_addr += cur_size * h;
-
 
984
			size_bytes -= cur_size * h;
-
 
985
		}
-
 
986
	}
-