Rev 4401 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | #include "main/context.h" |
||
34 | #include "main/blend.h" |
||
35 | #include "main/mtypes.h" |
||
36 | #include "main/samplerobj.h" |
||
37 | #include "program/prog_parameter.h" |
||
38 | |||
39 | #include "intel_mipmap_tree.h" |
||
40 | #include "intel_batchbuffer.h" |
||
41 | #include "intel_tex.h" |
||
42 | #include "intel_fbo.h" |
||
43 | #include "intel_buffer_objects.h" |
||
44 | |||
45 | #include "brw_context.h" |
||
46 | #include "brw_state.h" |
||
47 | #include "brw_defines.h" |
||
48 | #include "brw_wm.h" |
||
49 | |||
50 | GLuint |
||
51 | translate_tex_target(GLenum target) |
||
52 | { |
||
53 | switch (target) { |
||
54 | case GL_TEXTURE_1D: |
||
55 | case GL_TEXTURE_1D_ARRAY_EXT: |
||
56 | return BRW_SURFACE_1D; |
||
57 | |||
58 | case GL_TEXTURE_RECTANGLE_NV: |
||
59 | return BRW_SURFACE_2D; |
||
60 | |||
61 | case GL_TEXTURE_2D: |
||
62 | case GL_TEXTURE_2D_ARRAY_EXT: |
||
63 | case GL_TEXTURE_EXTERNAL_OES: |
||
64 | case GL_TEXTURE_2D_MULTISAMPLE: |
||
65 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: |
||
66 | return BRW_SURFACE_2D; |
||
67 | |||
68 | case GL_TEXTURE_3D: |
||
69 | return BRW_SURFACE_3D; |
||
70 | |||
71 | case GL_TEXTURE_CUBE_MAP: |
||
72 | case GL_TEXTURE_CUBE_MAP_ARRAY: |
||
73 | return BRW_SURFACE_CUBE; |
||
74 | |||
75 | default: |
||
76 | assert(0); |
||
77 | return 0; |
||
78 | } |
||
79 | } |
||
80 | |||
81 | uint32_t |
||
82 | brw_get_surface_tiling_bits(uint32_t tiling) |
||
83 | { |
||
84 | switch (tiling) { |
||
85 | case I915_TILING_X: |
||
86 | return BRW_SURFACE_TILED; |
||
87 | case I915_TILING_Y: |
||
88 | return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; |
||
89 | default: |
||
90 | return 0; |
||
91 | } |
||
92 | } |
||
93 | |||
94 | |||
95 | uint32_t |
||
96 | brw_get_surface_num_multisamples(unsigned num_samples) |
||
97 | { |
||
98 | if (num_samples > 1) |
||
99 | return BRW_SURFACE_MULTISAMPLECOUNT_4; |
||
100 | else |
||
101 | return BRW_SURFACE_MULTISAMPLECOUNT_1; |
||
102 | } |
||
103 | |||
104 | |||
105 | /** |
||
106 | * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle |
||
107 | * swizzling. |
||
108 | */ |
||
109 | int |
||
110 | brw_get_texture_swizzle(const struct gl_context *ctx, |
||
111 | const struct gl_texture_object *t) |
||
112 | { |
||
113 | const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; |
||
114 | |||
115 | int swizzles[SWIZZLE_NIL + 1] = { |
||
116 | SWIZZLE_X, |
||
117 | SWIZZLE_Y, |
||
118 | SWIZZLE_Z, |
||
119 | SWIZZLE_W, |
||
120 | SWIZZLE_ZERO, |
||
121 | SWIZZLE_ONE, |
||
122 | SWIZZLE_NIL |
||
123 | }; |
||
124 | |||
125 | if (img->_BaseFormat == GL_DEPTH_COMPONENT || |
||
126 | img->_BaseFormat == GL_DEPTH_STENCIL) { |
||
127 | GLenum depth_mode = t->DepthMode; |
||
128 | |||
129 | /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures |
||
130 | * with depth component data specified with a sized internal format. |
||
131 | * Otherwise, it's left at the old default, GL_LUMINANCE. |
||
132 | */ |
||
133 | if (_mesa_is_gles3(ctx) && |
||
134 | img->InternalFormat != GL_DEPTH_COMPONENT && |
||
135 | img->InternalFormat != GL_DEPTH_STENCIL) { |
||
136 | depth_mode = GL_RED; |
||
137 | } |
||
138 | |||
139 | switch (depth_mode) { |
||
140 | case GL_ALPHA: |
||
141 | swizzles[0] = SWIZZLE_ZERO; |
||
142 | swizzles[1] = SWIZZLE_ZERO; |
||
143 | swizzles[2] = SWIZZLE_ZERO; |
||
144 | swizzles[3] = SWIZZLE_X; |
||
145 | break; |
||
146 | case GL_LUMINANCE: |
||
147 | swizzles[0] = SWIZZLE_X; |
||
148 | swizzles[1] = SWIZZLE_X; |
||
149 | swizzles[2] = SWIZZLE_X; |
||
150 | swizzles[3] = SWIZZLE_ONE; |
||
151 | break; |
||
152 | case GL_INTENSITY: |
||
153 | swizzles[0] = SWIZZLE_X; |
||
154 | swizzles[1] = SWIZZLE_X; |
||
155 | swizzles[2] = SWIZZLE_X; |
||
156 | swizzles[3] = SWIZZLE_X; |
||
157 | break; |
||
158 | case GL_RED: |
||
159 | swizzles[0] = SWIZZLE_X; |
||
160 | swizzles[1] = SWIZZLE_ZERO; |
||
161 | swizzles[2] = SWIZZLE_ZERO; |
||
162 | swizzles[3] = SWIZZLE_ONE; |
||
163 | break; |
||
164 | } |
||
165 | } |
||
166 | |||
167 | /* If the texture's format is alpha-only, force R, G, and B to |
||
168 | * 0.0. Similarly, if the texture's format has no alpha channel, |
||
169 | * force the alpha value read to 1.0. This allows for the |
||
170 | * implementation to use an RGBA texture for any of these formats |
||
171 | * without leaking any unexpected values. |
||
172 | */ |
||
173 | switch (img->_BaseFormat) { |
||
174 | case GL_ALPHA: |
||
175 | swizzles[0] = SWIZZLE_ZERO; |
||
176 | swizzles[1] = SWIZZLE_ZERO; |
||
177 | swizzles[2] = SWIZZLE_ZERO; |
||
178 | break; |
||
179 | case GL_RED: |
||
180 | case GL_RG: |
||
181 | case GL_RGB: |
||
182 | if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0) |
||
183 | swizzles[3] = SWIZZLE_ONE; |
||
184 | break; |
||
185 | } |
||
186 | |||
187 | return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], |
||
188 | swizzles[GET_SWZ(t->_Swizzle, 1)], |
||
189 | swizzles[GET_SWZ(t->_Swizzle, 2)], |
||
190 | swizzles[GET_SWZ(t->_Swizzle, 3)]); |
||
191 | } |
||
192 | |||
193 | |||
194 | static void |
||
195 | brw_update_buffer_texture_surface(struct gl_context *ctx, |
||
196 | unsigned unit, |
||
197 | uint32_t *binding_table, |
||
198 | unsigned surf_index) |
||
199 | { |
||
200 | struct brw_context *brw = brw_context(ctx); |
||
201 | struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; |
||
202 | uint32_t *surf; |
||
203 | struct intel_buffer_object *intel_obj = |
||
204 | intel_buffer_object(tObj->BufferObject); |
||
205 | drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL; |
||
206 | gl_format format = tObj->_BufferObjectFormat; |
||
207 | uint32_t brw_format = brw_format_for_mesa_format(format); |
||
208 | int texel_size = _mesa_get_format_bytes(format); |
||
209 | |||
210 | if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) { |
||
211 | _mesa_problem(NULL, "bad format %s for texture buffer\n", |
||
212 | _mesa_get_format_name(format)); |
||
213 | } |
||
214 | |||
215 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
216 | 6 * 4, 32, &binding_table[surf_index]); |
||
217 | |||
218 | surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | |
||
219 | (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT)); |
||
220 | |||
221 | if (brw->gen >= 6) |
||
222 | surf[0] |= BRW_SURFACE_RC_READ_WRITE; |
||
223 | |||
224 | if (bo) { |
||
225 | surf[1] = bo->offset; /* reloc */ |
||
226 | |||
227 | /* Emit relocation to surface contents. */ |
||
228 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
229 | binding_table[surf_index] + 4, |
||
230 | bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); |
||
231 | |||
232 | int w = intel_obj->Base.Size / texel_size; |
||
233 | surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT | |
||
234 | ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT); |
||
235 | surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | |
||
236 | (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT); |
||
237 | } else { |
||
238 | surf[1] = 0; |
||
239 | surf[2] = 0; |
||
240 | surf[3] = 0; |
||
241 | } |
||
242 | |||
243 | surf[4] = 0; |
||
244 | surf[5] = 0; |
||
245 | } |
||
246 | |||
247 | static void |
||
248 | brw_update_texture_surface(struct gl_context *ctx, |
||
249 | unsigned unit, |
||
250 | uint32_t *binding_table, |
||
251 | unsigned surf_index) |
||
252 | { |
||
253 | struct brw_context *brw = brw_context(ctx); |
||
254 | struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; |
||
255 | struct intel_texture_object *intelObj = intel_texture_object(tObj); |
||
256 | struct intel_mipmap_tree *mt = intelObj->mt; |
||
257 | struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; |
||
258 | struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); |
||
259 | uint32_t *surf; |
||
260 | uint32_t tile_x, tile_y; |
||
261 | |||
4401 | Serge | 262 | /* BRW_NEW_UNIFORM_BUFFER */ |
4358 | Serge | 263 | if (tObj->Target == GL_TEXTURE_BUFFER) { |
264 | brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index); |
||
265 | return; |
||
266 | } |
||
267 | |||
268 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
269 | 6 * 4, 32, &binding_table[surf_index]); |
||
270 | |||
271 | surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | |
||
272 | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | |
||
273 | BRW_SURFACE_CUBEFACE_ENABLES | |
||
274 | (translate_tex_format(brw, |
||
275 | mt->format, |
||
276 | tObj->DepthMode, |
||
277 | sampler->sRGBDecode) << |
||
278 | BRW_SURFACE_FORMAT_SHIFT)); |
||
279 | |||
280 | surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */ |
||
281 | surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0, |
||
282 | &tile_x, &tile_y); |
||
283 | |||
284 | surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT | |
||
285 | (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT | |
||
286 | (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT); |
||
287 | |||
288 | surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) | |
||
289 | (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT | |
||
290 | (intelObj->mt->region->pitch - 1) << |
||
291 | BRW_SURFACE_PITCH_SHIFT); |
||
292 | |||
293 | surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples); |
||
294 | |||
295 | assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); |
||
296 | /* Note that the low bits of these fields are missing, so |
||
297 | * there's the possibility of getting in trouble. |
||
298 | */ |
||
299 | assert(tile_x % 4 == 0); |
||
300 | assert(tile_y % 2 == 0); |
||
301 | surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | |
||
302 | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | |
||
303 | (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); |
||
304 | |||
305 | /* Emit relocation to surface contents */ |
||
306 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
307 | binding_table[surf_index] + 4, |
||
308 | intelObj->mt->region->bo, |
||
309 | surf[1] - intelObj->mt->region->bo->offset, |
||
310 | I915_GEM_DOMAIN_SAMPLER, 0); |
||
311 | } |
||
312 | |||
313 | /** |
||
314 | * Create the constant buffer surface. Vertex/fragment shader constants will be |
||
315 | * read from this buffer with Data Port Read instructions/messages. |
||
316 | */ |
||
317 | static void |
||
318 | brw_create_constant_surface(struct brw_context *brw, |
||
319 | drm_intel_bo *bo, |
||
320 | uint32_t offset, |
||
321 | uint32_t size, |
||
322 | uint32_t *out_offset, |
||
323 | bool dword_pitch) |
||
324 | { |
||
325 | uint32_t stride = dword_pitch ? 4 : 16; |
||
326 | uint32_t elements = ALIGN(size, stride) / stride; |
||
327 | const GLint w = elements - 1; |
||
328 | uint32_t *surf; |
||
329 | |||
330 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
331 | 6 * 4, 32, out_offset); |
||
332 | |||
333 | surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | |
||
334 | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | |
||
335 | BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT); |
||
336 | |||
337 | if (brw->gen >= 6) |
||
338 | surf[0] |= BRW_SURFACE_RC_READ_WRITE; |
||
339 | |||
340 | surf[1] = bo->offset + offset; /* reloc */ |
||
341 | |||
342 | surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT | |
||
343 | ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT); |
||
344 | |||
345 | surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | |
||
346 | (stride - 1) << BRW_SURFACE_PITCH_SHIFT); |
||
347 | |||
348 | surf[4] = 0; |
||
349 | surf[5] = 0; |
||
350 | |||
351 | /* Emit relocation to surface contents. The 965 PRM, Volume 4, section |
||
352 | * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate |
||
353 | * physical cache. It is mapped in hardware to the sampler cache." |
||
354 | */ |
||
355 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
356 | *out_offset + 4, |
||
357 | bo, offset, |
||
358 | I915_GEM_DOMAIN_SAMPLER, 0); |
||
359 | } |
||
360 | |||
361 | /** |
||
362 | * Set up a binding table entry for use by stream output logic (transform |
||
363 | * feedback). |
||
364 | * |
||
365 | * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES. |
||
366 | */ |
||
367 | void |
||
368 | brw_update_sol_surface(struct brw_context *brw, |
||
369 | struct gl_buffer_object *buffer_obj, |
||
370 | uint32_t *out_offset, unsigned num_vector_components, |
||
371 | unsigned stride_dwords, unsigned offset_dwords) |
||
372 | { |
||
373 | struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj); |
||
374 | drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART); |
||
375 | uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, |
||
376 | out_offset); |
||
377 | uint32_t pitch_minus_1 = 4*stride_dwords - 1; |
||
378 | uint32_t offset_bytes = 4 * offset_dwords; |
||
379 | size_t size_dwords = buffer_obj->Size / 4; |
||
380 | uint32_t buffer_size_minus_1, width, height, depth, surface_format; |
||
381 | |||
382 | /* FIXME: can we rely on core Mesa to ensure that the buffer isn't |
||
383 | * too big to map using a single binding table entry? |
||
384 | */ |
||
385 | assert((size_dwords - offset_dwords) / stride_dwords |
||
386 | <= BRW_MAX_NUM_BUFFER_ENTRIES); |
||
387 | |||
388 | if (size_dwords > offset_dwords + num_vector_components) { |
||
389 | /* There is room for at least 1 transform feedback output in the buffer. |
||
390 | * Compute the number of additional transform feedback outputs the |
||
391 | * buffer has room for. |
||
392 | */ |
||
393 | buffer_size_minus_1 = |
||
394 | (size_dwords - offset_dwords - num_vector_components) / stride_dwords; |
||
395 | } else { |
||
396 | /* There isn't even room for a single transform feedback output in the |
||
397 | * buffer. We can't configure the binding table entry to prevent output |
||
398 | * entirely; we'll have to rely on the geometry shader to detect |
||
399 | * overflow. But to minimize the damage in case of a bug, set up the |
||
400 | * binding table entry to just allow a single output. |
||
401 | */ |
||
402 | buffer_size_minus_1 = 0; |
||
403 | } |
||
404 | width = buffer_size_minus_1 & 0x7f; |
||
405 | height = (buffer_size_minus_1 & 0xfff80) >> 7; |
||
406 | depth = (buffer_size_minus_1 & 0x7f00000) >> 20; |
||
407 | |||
408 | switch (num_vector_components) { |
||
409 | case 1: |
||
410 | surface_format = BRW_SURFACEFORMAT_R32_FLOAT; |
||
411 | break; |
||
412 | case 2: |
||
413 | surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT; |
||
414 | break; |
||
415 | case 3: |
||
416 | surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; |
||
417 | break; |
||
418 | case 4: |
||
419 | surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; |
||
420 | break; |
||
421 | default: |
||
422 | assert(!"Invalid vector size for transform feedback output"); |
||
423 | surface_format = BRW_SURFACEFORMAT_R32_FLOAT; |
||
424 | break; |
||
425 | } |
||
426 | |||
427 | surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | |
||
428 | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | |
||
429 | surface_format << BRW_SURFACE_FORMAT_SHIFT | |
||
430 | BRW_SURFACE_RC_READ_WRITE; |
||
431 | surf[1] = bo->offset + offset_bytes; /* reloc */ |
||
432 | surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | |
||
433 | height << BRW_SURFACE_HEIGHT_SHIFT); |
||
434 | surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | |
||
435 | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); |
||
436 | surf[4] = 0; |
||
437 | surf[5] = 0; |
||
438 | |||
439 | /* Emit relocation to surface contents. */ |
||
440 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
441 | *out_offset + 4, |
||
442 | bo, offset_bytes, |
||
443 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
||
444 | } |
||
445 | |||
446 | /* Creates a new WM constant buffer reflecting the current fragment program's |
||
447 | * constants, if needed by the fragment program. |
||
448 | * |
||
449 | * Otherwise, constants go through the CURBEs using the brw_constant_buffer |
||
450 | * state atom. |
||
451 | */ |
||
452 | static void |
||
453 | brw_upload_wm_pull_constants(struct brw_context *brw) |
||
454 | { |
||
455 | struct gl_context *ctx = &brw->ctx; |
||
456 | /* BRW_NEW_FRAGMENT_PROGRAM */ |
||
457 | struct brw_fragment_program *fp = |
||
458 | (struct brw_fragment_program *) brw->fragment_program; |
||
459 | struct gl_program_parameter_list *params = fp->program.Base.Parameters; |
||
460 | const int size = brw->wm.prog_data->nr_pull_params * sizeof(float); |
||
461 | const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER; |
||
462 | float *constants; |
||
463 | unsigned int i; |
||
464 | |||
465 | _mesa_load_state_parameters(ctx, params); |
||
466 | |||
467 | /* CACHE_NEW_WM_PROG */ |
||
468 | if (brw->wm.prog_data->nr_pull_params == 0) { |
||
469 | if (brw->wm.const_bo) { |
||
470 | drm_intel_bo_unreference(brw->wm.const_bo); |
||
471 | brw->wm.const_bo = NULL; |
||
472 | brw->wm.surf_offset[surf_index] = 0; |
||
473 | brw->state.dirty.brw |= BRW_NEW_SURFACES; |
||
474 | } |
||
475 | return; |
||
476 | } |
||
477 | |||
478 | drm_intel_bo_unreference(brw->wm.const_bo); |
||
479 | brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo", |
||
480 | size, 64); |
||
481 | |||
482 | /* _NEW_PROGRAM_CONSTANTS */ |
||
483 | drm_intel_gem_bo_map_gtt(brw->wm.const_bo); |
||
484 | constants = brw->wm.const_bo->virtual; |
||
485 | for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { |
||
486 | constants[i] = *brw->wm.prog_data->pull_param[i]; |
||
487 | } |
||
488 | drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); |
||
489 | |||
490 | brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size, |
||
491 | &brw->wm.surf_offset[surf_index], |
||
492 | true); |
||
493 | |||
494 | brw->state.dirty.brw |= BRW_NEW_SURFACES; |
||
495 | } |
||
496 | |||
497 | const struct brw_tracked_state brw_wm_pull_constants = { |
||
498 | .dirty = { |
||
499 | .mesa = (_NEW_PROGRAM_CONSTANTS), |
||
500 | .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM), |
||
501 | .cache = CACHE_NEW_WM_PROG, |
||
502 | }, |
||
503 | .emit = brw_upload_wm_pull_constants, |
||
504 | }; |
||
505 | |||
506 | static void |
||
507 | brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) |
||
508 | { |
||
509 | /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming |
||
510 | * Notes): |
||
511 | * |
||
512 | * A null surface will be used in instances where an actual surface is |
||
513 | * not bound. When a write message is generated to a null surface, no |
||
514 | * actual surface is written to. When a read message (including any |
||
515 | * sampling engine message) is generated to a null surface, the result |
||
516 | * is all zeros. Note that a null surface type is allowed to be used |
||
517 | * with all messages, even if it is not specificially indicated as |
||
518 | * supported. All of the remaining fields in surface state are ignored |
||
519 | * for null surfaces, with the following exceptions: |
||
520 | * |
||
521 | * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the |
||
522 | * depth buffer’s corresponding state for all render target surfaces, |
||
523 | * including null. |
||
524 | * |
||
525 | * - Surface Format must be R8G8B8A8_UNORM. |
||
526 | */ |
||
527 | struct gl_context *ctx = &brw->ctx; |
||
528 | uint32_t *surf; |
||
529 | unsigned surface_type = BRW_SURFACE_NULL; |
||
530 | drm_intel_bo *bo = NULL; |
||
531 | unsigned pitch_minus_1 = 0; |
||
532 | uint32_t multisampling_state = 0; |
||
533 | |||
534 | /* _NEW_BUFFERS */ |
||
535 | const struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
536 | |||
537 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
538 | 6 * 4, 32, &brw->wm.surf_offset[unit]); |
||
539 | |||
540 | if (fb->Visual.samples > 1) { |
||
541 | /* On Gen6, null render targets seem to cause GPU hangs when |
||
542 | * multisampling. So work around this problem by rendering into dummy |
||
543 | * color buffer. |
||
544 | * |
||
545 | * To decrease the amount of memory needed by the workaround buffer, we |
||
546 | * set its pitch to 128 bytes (the width of a Y tile). This means that |
||
547 | * the amount of memory needed for the workaround buffer is |
||
548 | * (width_in_tiles + height_in_tiles - 1) tiles. |
||
549 | * |
||
550 | * Note that since the workaround buffer will be interpreted by the |
||
551 | * hardware as an interleaved multisampled buffer, we need to compute |
||
552 | * width_in_tiles and height_in_tiles by dividing the width and height |
||
553 | * by 16 rather than the normal Y-tile size of 32. |
||
554 | */ |
||
555 | unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16; |
||
556 | unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16; |
||
557 | unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; |
||
558 | brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo, |
||
559 | size_needed); |
||
560 | bo = brw->wm.multisampled_null_render_target_bo; |
||
561 | surface_type = BRW_SURFACE_2D; |
||
562 | pitch_minus_1 = 127; |
||
563 | multisampling_state = |
||
564 | brw_get_surface_num_multisamples(fb->Visual.samples); |
||
565 | } |
||
566 | |||
567 | surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT | |
||
568 | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); |
||
569 | if (brw->gen < 6) { |
||
570 | surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT | |
||
571 | 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT | |
||
572 | 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | |
||
573 | 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); |
||
574 | } |
||
575 | surf[1] = bo ? bo->offset : 0; |
||
576 | surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | |
||
577 | (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); |
||
578 | |||
579 | /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming |
||
580 | * Notes): |
||
581 | * |
||
582 | * If Surface Type is SURFTYPE_NULL, this field must be TRUE |
||
583 | */ |
||
584 | surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | |
||
585 | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); |
||
586 | surf[4] = multisampling_state; |
||
587 | surf[5] = 0; |
||
588 | |||
589 | if (bo) { |
||
590 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
591 | brw->wm.surf_offset[unit] + 4, |
||
592 | bo, 0, |
||
593 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
||
594 | } |
||
595 | } |
||
596 | |||
597 | /** |
||
598 | * Sets up a surface state structure to point at the given region. |
||
599 | * While it is only used for the front/back buffer currently, it should be |
||
600 | * usable for further buffers when doing ARB_draw_buffer support. |
||
601 | */ |
||
602 | static void |
||
603 | brw_update_renderbuffer_surface(struct brw_context *brw, |
||
604 | struct gl_renderbuffer *rb, |
||
605 | bool layered, |
||
606 | unsigned int unit) |
||
607 | { |
||
608 | struct gl_context *ctx = &brw->ctx; |
||
609 | struct intel_renderbuffer *irb = intel_renderbuffer(rb); |
||
610 | struct intel_mipmap_tree *mt = irb->mt; |
||
611 | struct intel_region *region; |
||
612 | uint32_t *surf; |
||
613 | uint32_t tile_x, tile_y; |
||
614 | uint32_t format = 0; |
||
615 | /* _NEW_BUFFERS */ |
||
616 | gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); |
||
617 | |||
618 | assert(!layered); |
||
619 | |||
620 | if (rb->TexImage && !brw->has_surface_tile_offset) { |
||
621 | intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y); |
||
622 | |||
623 | if (tile_x != 0 || tile_y != 0) { |
||
624 | /* Original gen4 hardware couldn't draw to a non-tile-aligned |
||
625 | * destination in a miptree unless you actually setup your renderbuffer |
||
626 | * as a miptree and used the fragile lod/array_index/etc. controls to |
||
627 | * select the image. So, instead, we just make a new single-level |
||
628 | * miptree and render into that. |
||
629 | */ |
||
630 | intel_renderbuffer_move_to_temp(brw, irb, false); |
||
631 | mt = irb->mt; |
||
632 | } |
||
633 | } |
||
634 | |||
635 | intel_miptree_used_for_rendering(irb->mt); |
||
636 | |||
637 | region = irb->mt->region; |
||
638 | |||
639 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
640 | 6 * 4, 32, &brw->wm.surf_offset[unit]); |
||
641 | |||
642 | format = brw->render_target_format[rb_format]; |
||
643 | if (unlikely(!brw->format_supported_as_render_target[rb_format])) { |
||
644 | _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", |
||
645 | __FUNCTION__, _mesa_get_format_name(rb_format)); |
||
646 | } |
||
647 | |||
648 | surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | |
||
649 | format << BRW_SURFACE_FORMAT_SHIFT); |
||
650 | |||
651 | /* reloc */ |
||
652 | surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) + |
||
653 | region->bo->offset); |
||
654 | |||
655 | surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | |
||
656 | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); |
||
657 | |||
658 | surf[3] = (brw_get_surface_tiling_bits(region->tiling) | |
||
659 | (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); |
||
660 | |||
661 | surf[4] = brw_get_surface_num_multisamples(mt->num_samples); |
||
662 | |||
663 | assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); |
||
664 | /* Note that the low bits of these fields are missing, so |
||
665 | * there's the possibility of getting in trouble. |
||
666 | */ |
||
667 | assert(tile_x % 4 == 0); |
||
668 | assert(tile_y % 2 == 0); |
||
669 | surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | |
||
670 | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | |
||
671 | (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); |
||
672 | |||
673 | if (brw->gen < 6) { |
||
674 | /* _NEW_COLOR */ |
||
675 | if (!ctx->Color.ColorLogicOpEnabled && |
||
676 | (ctx->Color.BlendEnabled & (1 << unit))) |
||
677 | surf[0] |= BRW_SURFACE_BLEND_ENABLED; |
||
678 | |||
679 | if (!ctx->Color.ColorMask[unit][0]) |
||
680 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT; |
||
681 | if (!ctx->Color.ColorMask[unit][1]) |
||
682 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT; |
||
683 | if (!ctx->Color.ColorMask[unit][2]) |
||
684 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT; |
||
685 | |||
686 | /* As mentioned above, disable writes to the alpha component when the |
||
687 | * renderbuffer is XRGB. |
||
688 | */ |
||
689 | if (ctx->DrawBuffer->Visual.alphaBits == 0 || |
||
690 | !ctx->Color.ColorMask[unit][3]) { |
||
691 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT; |
||
692 | } |
||
693 | } |
||
694 | |||
695 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
696 | brw->wm.surf_offset[unit] + 4, |
||
697 | region->bo, |
||
698 | surf[1] - region->bo->offset, |
||
699 | I915_GEM_DOMAIN_RENDER, |
||
700 | I915_GEM_DOMAIN_RENDER); |
||
701 | } |
||
702 | |||
703 | /** |
||
704 | * Construct SURFACE_STATE objects for renderbuffers/draw buffers. |
||
705 | */ |
||
706 | static void |
||
707 | brw_update_renderbuffer_surfaces(struct brw_context *brw) |
||
708 | { |
||
709 | struct gl_context *ctx = &brw->ctx; |
||
710 | GLuint i; |
||
711 | |||
712 | /* _NEW_BUFFERS | _NEW_COLOR */ |
||
713 | /* Update surfaces for drawing buffers */ |
||
714 | if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { |
||
715 | for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { |
||
716 | if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { |
||
717 | brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], |
||
718 | ctx->DrawBuffer->Layered, i); |
||
719 | } else { |
||
720 | brw->vtbl.update_null_renderbuffer_surface(brw, i); |
||
721 | } |
||
722 | } |
||
723 | } else { |
||
724 | brw->vtbl.update_null_renderbuffer_surface(brw, 0); |
||
725 | } |
||
726 | brw->state.dirty.brw |= BRW_NEW_SURFACES; |
||
727 | } |
||
728 | |||
729 | const struct brw_tracked_state brw_renderbuffer_surfaces = { |
||
730 | .dirty = { |
||
731 | .mesa = (_NEW_COLOR | |
||
732 | _NEW_BUFFERS), |
||
733 | .brw = BRW_NEW_BATCH, |
||
734 | .cache = 0 |
||
735 | }, |
||
736 | .emit = brw_update_renderbuffer_surfaces, |
||
737 | }; |
||
738 | |||
739 | const struct brw_tracked_state gen6_renderbuffer_surfaces = { |
||
740 | .dirty = { |
||
741 | .mesa = _NEW_BUFFERS, |
||
742 | .brw = BRW_NEW_BATCH, |
||
743 | .cache = 0 |
||
744 | }, |
||
745 | .emit = brw_update_renderbuffer_surfaces, |
||
746 | }; |
||
747 | |||
748 | /** |
||
749 | * Construct SURFACE_STATE objects for enabled textures. |
||
750 | */ |
||
751 | static void |
||
752 | brw_update_texture_surfaces(struct brw_context *brw) |
||
753 | { |
||
754 | struct gl_context *ctx = &brw->ctx; |
||
755 | |||
756 | /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM: |
||
757 | * Unfortunately, we're stuck using the gl_program structs until the |
||
758 | * ARB_fragment_program front-end gets converted to GLSL IR. These |
||
759 | * have the downside that SamplerUnits is split and only contains the |
||
760 | * mappings for samplers active in that stage. |
||
761 | */ |
||
762 | struct gl_program *vs = (struct gl_program *) brw->vertex_program; |
||
763 | struct gl_program *fs = (struct gl_program *) brw->fragment_program; |
||
764 | |||
765 | unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed); |
||
766 | |||
767 | for (unsigned s = 0; s < num_samplers; s++) { |
||
768 | brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0; |
||
769 | brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0; |
||
770 | |||
771 | if (vs->SamplersUsed & (1 << s)) { |
||
772 | const unsigned unit = vs->SamplerUnits[s]; |
||
773 | |||
774 | /* _NEW_TEXTURE */ |
||
775 | if (ctx->Texture.Unit[unit]._ReallyEnabled) { |
||
776 | brw->vtbl.update_texture_surface(ctx, unit, |
||
777 | brw->vs.surf_offset, |
||
778 | SURF_INDEX_VS_TEXTURE(s)); |
||
779 | } |
||
780 | } |
||
781 | |||
782 | if (fs->SamplersUsed & (1 << s)) { |
||
783 | const unsigned unit = fs->SamplerUnits[s]; |
||
784 | |||
785 | /* _NEW_TEXTURE */ |
||
786 | if (ctx->Texture.Unit[unit]._ReallyEnabled) { |
||
787 | brw->vtbl.update_texture_surface(ctx, unit, |
||
788 | brw->wm.surf_offset, |
||
789 | SURF_INDEX_TEXTURE(s)); |
||
790 | } |
||
791 | } |
||
792 | } |
||
793 | |||
794 | brw->state.dirty.brw |= BRW_NEW_SURFACES; |
||
795 | } |
||
796 | |||
797 | const struct brw_tracked_state brw_texture_surfaces = { |
||
798 | .dirty = { |
||
799 | .mesa = _NEW_TEXTURE, |
||
800 | .brw = BRW_NEW_BATCH | |
||
4401 | Serge | 801 | BRW_NEW_UNIFORM_BUFFER | |
4358 | Serge | 802 | BRW_NEW_VERTEX_PROGRAM | |
803 | BRW_NEW_FRAGMENT_PROGRAM, |
||
804 | .cache = 0 |
||
805 | }, |
||
806 | .emit = brw_update_texture_surfaces, |
||
807 | }; |
||
808 | |||
809 | void |
||
810 | brw_upload_ubo_surfaces(struct brw_context *brw, |
||
811 | struct gl_shader *shader, |
||
812 | uint32_t *surf_offsets) |
||
813 | { |
||
814 | struct gl_context *ctx = &brw->ctx; |
||
815 | |||
816 | if (!shader) |
||
817 | return; |
||
818 | |||
819 | for (int i = 0; i < shader->NumUniformBlocks; i++) { |
||
820 | struct gl_uniform_buffer_binding *binding; |
||
821 | struct intel_buffer_object *intel_bo; |
||
822 | |||
823 | binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding]; |
||
824 | intel_bo = intel_buffer_object(binding->BufferObject); |
||
825 | drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ); |
||
826 | |||
827 | /* Because behavior for referencing outside of the binding's size in the |
||
828 | * glBindBufferRange case is undefined, we can just bind the whole buffer |
||
829 | * glBindBufferBase wants and be a correct implementation. |
||
830 | */ |
||
831 | brw->vtbl.create_constant_surface(brw, bo, binding->Offset, |
||
832 | bo->size - binding->Offset, |
||
833 | &surf_offsets[i], |
||
834 | shader->Type == GL_FRAGMENT_SHADER); |
||
835 | } |
||
836 | |||
837 | if (shader->NumUniformBlocks) |
||
838 | brw->state.dirty.brw |= BRW_NEW_SURFACES; |
||
839 | } |
||
840 | |||
841 | static void |
||
842 | brw_upload_wm_ubo_surfaces(struct brw_context *brw) |
||
843 | { |
||
844 | struct gl_context *ctx = &brw->ctx; |
||
845 | /* _NEW_PROGRAM */ |
||
846 | struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; |
||
847 | |||
848 | if (!prog) |
||
849 | return; |
||
850 | |||
851 | brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], |
||
852 | &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]); |
||
853 | } |
||
854 | |||
855 | const struct brw_tracked_state brw_wm_ubo_surfaces = { |
||
856 | .dirty = { |
||
857 | .mesa = _NEW_PROGRAM, |
||
858 | .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER, |
||
859 | .cache = 0, |
||
860 | }, |
||
861 | .emit = brw_upload_wm_ubo_surfaces, |
||
862 | }; |
||
863 | |||
864 | /** |
||
865 | * Constructs the binding table for the WM surface state, which maps unit |
||
866 | * numbers to surface state objects. |
||
867 | */ |
||
868 | static void |
||
869 | brw_upload_wm_binding_table(struct brw_context *brw) |
||
870 | { |
||
871 | uint32_t *bind; |
||
872 | int i; |
||
873 | |||
874 | if (INTEL_DEBUG & DEBUG_SHADER_TIME) { |
||
875 | gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]); |
||
876 | } |
||
877 | |||
878 | /* Might want to calculate nr_surfaces first, to avoid taking up so much |
||
879 | * space for the binding table. |
||
880 | */ |
||
881 | bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, |
||
882 | sizeof(uint32_t) * BRW_MAX_WM_SURFACES, |
||
883 | 32, &brw->wm.bind_bo_offset); |
||
884 | |||
885 | /* BRW_NEW_SURFACES */ |
||
886 | for (i = 0; i < BRW_MAX_WM_SURFACES; i++) { |
||
887 | bind[i] = brw->wm.surf_offset[i]; |
||
888 | } |
||
889 | |||
890 | brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE; |
||
891 | } |
||
892 | |||
893 | const struct brw_tracked_state brw_wm_binding_table = { |
||
894 | .dirty = { |
||
895 | .mesa = 0, |
||
896 | .brw = (BRW_NEW_BATCH | |
||
897 | BRW_NEW_SURFACES), |
||
898 | .cache = 0 |
||
899 | }, |
||
900 | .emit = brw_upload_wm_binding_table, |
||
901 | }; |
||
902 | |||
903 | void |
||
904 | gen4_init_vtable_surface_functions(struct brw_context *brw) |
||
905 | { |
||
906 | brw->vtbl.update_texture_surface = brw_update_texture_surface; |
||
907 | brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface; |
||
908 | brw->vtbl.update_null_renderbuffer_surface = |
||
909 | brw_update_null_renderbuffer_surface; |
||
910 | brw->vtbl.create_constant_surface = brw_create_constant_surface; |
||
911 | }>>><>><>>>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>>><>><>><>><>><>><>><>=>><>><>><>><>><>><>><>><>><> |