Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | #include "main/context.h" |
||
34 | #include "main/blend.h" |
||
35 | #include "main/mtypes.h" |
||
36 | #include "main/samplerobj.h" |
||
37 | #include "program/prog_parameter.h" |
||
38 | |||
39 | #include "intel_mipmap_tree.h" |
||
40 | #include "intel_batchbuffer.h" |
||
41 | #include "intel_tex.h" |
||
42 | #include "intel_fbo.h" |
||
43 | #include "intel_buffer_objects.h" |
||
44 | |||
45 | #include "brw_context.h" |
||
46 | #include "brw_state.h" |
||
47 | #include "brw_defines.h" |
||
48 | #include "brw_wm.h" |
||
49 | |||
50 | GLuint |
||
51 | translate_tex_target(GLenum target) |
||
52 | { |
||
53 | switch (target) { |
||
54 | case GL_TEXTURE_1D: |
||
55 | case GL_TEXTURE_1D_ARRAY_EXT: |
||
56 | return BRW_SURFACE_1D; |
||
57 | |||
58 | case GL_TEXTURE_RECTANGLE_NV: |
||
59 | return BRW_SURFACE_2D; |
||
60 | |||
61 | case GL_TEXTURE_2D: |
||
62 | case GL_TEXTURE_2D_ARRAY_EXT: |
||
63 | case GL_TEXTURE_EXTERNAL_OES: |
||
64 | case GL_TEXTURE_2D_MULTISAMPLE: |
||
65 | case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: |
||
66 | return BRW_SURFACE_2D; |
||
67 | |||
68 | case GL_TEXTURE_3D: |
||
69 | return BRW_SURFACE_3D; |
||
70 | |||
71 | case GL_TEXTURE_CUBE_MAP: |
||
72 | case GL_TEXTURE_CUBE_MAP_ARRAY: |
||
73 | return BRW_SURFACE_CUBE; |
||
74 | |||
75 | default: |
||
76 | unreachable("not reached"); |
||
77 | } |
||
78 | } |
||
79 | |||
80 | uint32_t |
||
81 | brw_get_surface_tiling_bits(uint32_t tiling) |
||
82 | { |
||
83 | switch (tiling) { |
||
84 | case I915_TILING_X: |
||
85 | return BRW_SURFACE_TILED; |
||
86 | case I915_TILING_Y: |
||
87 | return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; |
||
88 | default: |
||
89 | return 0; |
||
90 | } |
||
91 | } |
||
92 | |||
93 | |||
94 | uint32_t |
||
95 | brw_get_surface_num_multisamples(unsigned num_samples) |
||
96 | { |
||
97 | if (num_samples > 1) |
||
98 | return BRW_SURFACE_MULTISAMPLECOUNT_4; |
||
99 | else |
||
100 | return BRW_SURFACE_MULTISAMPLECOUNT_1; |
||
101 | } |
||
102 | |||
103 | void |
||
104 | brw_configure_w_tiled(const struct intel_mipmap_tree *mt, |
||
105 | bool is_render_target, |
||
106 | unsigned *width, unsigned *height, |
||
107 | unsigned *pitch, uint32_t *tiling, unsigned *format) |
||
108 | { |
||
109 | static const unsigned halign_stencil = 8; |
||
110 | |||
111 | /* In Y-tiling row is twice as wide as in W-tiling, and subsequently |
||
112 | * there are half as many rows. |
||
113 | * In addition, mip-levels are accessed manually by the program and |
||
114 | * therefore the surface is setup to cover all the mip-levels for one slice. |
||
115 | * (Hardware is still used to access individual slices). |
||
116 | */ |
||
117 | *tiling = I915_TILING_Y; |
||
118 | *pitch = mt->pitch * 2; |
||
119 | *width = ALIGN(mt->total_width, halign_stencil) * 2; |
||
120 | *height = (mt->total_height / mt->physical_depth0) / 2; |
||
121 | |||
122 | if (is_render_target) { |
||
123 | *format = BRW_SURFACEFORMAT_R8_UINT; |
||
124 | } |
||
125 | } |
||
126 | |||
127 | |||
128 | /** |
||
129 | * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle |
||
130 | * swizzling. |
||
131 | */ |
||
132 | int |
||
133 | brw_get_texture_swizzle(const struct gl_context *ctx, |
||
134 | const struct gl_texture_object *t) |
||
135 | { |
||
136 | const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; |
||
137 | |||
138 | int swizzles[SWIZZLE_NIL + 1] = { |
||
139 | SWIZZLE_X, |
||
140 | SWIZZLE_Y, |
||
141 | SWIZZLE_Z, |
||
142 | SWIZZLE_W, |
||
143 | SWIZZLE_ZERO, |
||
144 | SWIZZLE_ONE, |
||
145 | SWIZZLE_NIL |
||
146 | }; |
||
147 | |||
148 | if (img->_BaseFormat == GL_DEPTH_COMPONENT || |
||
149 | img->_BaseFormat == GL_DEPTH_STENCIL) { |
||
150 | GLenum depth_mode = t->DepthMode; |
||
151 | |||
152 | /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures |
||
153 | * with depth component data specified with a sized internal format. |
||
154 | * Otherwise, it's left at the old default, GL_LUMINANCE. |
||
155 | */ |
||
156 | if (_mesa_is_gles3(ctx) && |
||
157 | img->InternalFormat != GL_DEPTH_COMPONENT && |
||
158 | img->InternalFormat != GL_DEPTH_STENCIL) { |
||
159 | depth_mode = GL_RED; |
||
160 | } |
||
161 | |||
162 | switch (depth_mode) { |
||
163 | case GL_ALPHA: |
||
164 | swizzles[0] = SWIZZLE_ZERO; |
||
165 | swizzles[1] = SWIZZLE_ZERO; |
||
166 | swizzles[2] = SWIZZLE_ZERO; |
||
167 | swizzles[3] = SWIZZLE_X; |
||
168 | break; |
||
169 | case GL_LUMINANCE: |
||
170 | swizzles[0] = SWIZZLE_X; |
||
171 | swizzles[1] = SWIZZLE_X; |
||
172 | swizzles[2] = SWIZZLE_X; |
||
173 | swizzles[3] = SWIZZLE_ONE; |
||
174 | break; |
||
175 | case GL_INTENSITY: |
||
176 | swizzles[0] = SWIZZLE_X; |
||
177 | swizzles[1] = SWIZZLE_X; |
||
178 | swizzles[2] = SWIZZLE_X; |
||
179 | swizzles[3] = SWIZZLE_X; |
||
180 | break; |
||
181 | case GL_RED: |
||
182 | swizzles[0] = SWIZZLE_X; |
||
183 | swizzles[1] = SWIZZLE_ZERO; |
||
184 | swizzles[2] = SWIZZLE_ZERO; |
||
185 | swizzles[3] = SWIZZLE_ONE; |
||
186 | break; |
||
187 | } |
||
188 | } |
||
189 | |||
190 | GLenum datatype = _mesa_get_format_datatype(img->TexFormat); |
||
191 | |||
192 | /* If the texture's format is alpha-only, force R, G, and B to |
||
193 | * 0.0. Similarly, if the texture's format has no alpha channel, |
||
194 | * force the alpha value read to 1.0. This allows for the |
||
195 | * implementation to use an RGBA texture for any of these formats |
||
196 | * without leaking any unexpected values. |
||
197 | */ |
||
198 | switch (img->_BaseFormat) { |
||
199 | case GL_ALPHA: |
||
200 | swizzles[0] = SWIZZLE_ZERO; |
||
201 | swizzles[1] = SWIZZLE_ZERO; |
||
202 | swizzles[2] = SWIZZLE_ZERO; |
||
203 | break; |
||
204 | case GL_LUMINANCE: |
||
205 | if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) { |
||
206 | swizzles[0] = SWIZZLE_X; |
||
207 | swizzles[1] = SWIZZLE_X; |
||
208 | swizzles[2] = SWIZZLE_X; |
||
209 | swizzles[3] = SWIZZLE_ONE; |
||
210 | } |
||
211 | break; |
||
212 | case GL_LUMINANCE_ALPHA: |
||
213 | if (datatype == GL_SIGNED_NORMALIZED) { |
||
214 | swizzles[0] = SWIZZLE_X; |
||
215 | swizzles[1] = SWIZZLE_X; |
||
216 | swizzles[2] = SWIZZLE_X; |
||
217 | swizzles[3] = SWIZZLE_W; |
||
218 | } |
||
219 | break; |
||
220 | case GL_INTENSITY: |
||
221 | if (datatype == GL_SIGNED_NORMALIZED) { |
||
222 | swizzles[0] = SWIZZLE_X; |
||
223 | swizzles[1] = SWIZZLE_X; |
||
224 | swizzles[2] = SWIZZLE_X; |
||
225 | swizzles[3] = SWIZZLE_X; |
||
226 | } |
||
227 | break; |
||
228 | case GL_RED: |
||
229 | case GL_RG: |
||
230 | case GL_RGB: |
||
231 | if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0) |
||
232 | swizzles[3] = SWIZZLE_ONE; |
||
233 | break; |
||
234 | } |
||
235 | |||
236 | return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], |
||
237 | swizzles[GET_SWZ(t->_Swizzle, 1)], |
||
238 | swizzles[GET_SWZ(t->_Swizzle, 2)], |
||
239 | swizzles[GET_SWZ(t->_Swizzle, 3)]); |
||
240 | } |
||
241 | |||
242 | static void |
||
243 | gen4_emit_buffer_surface_state(struct brw_context *brw, |
||
244 | uint32_t *out_offset, |
||
245 | drm_intel_bo *bo, |
||
246 | unsigned buffer_offset, |
||
247 | unsigned surface_format, |
||
248 | unsigned buffer_size, |
||
249 | unsigned pitch, |
||
250 | bool rw) |
||
251 | { |
||
252 | uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
253 | 6 * 4, 32, out_offset); |
||
254 | memset(surf, 0, 6 * 4); |
||
255 | |||
256 | surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | |
||
257 | surface_format << BRW_SURFACE_FORMAT_SHIFT | |
||
258 | (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0); |
||
259 | surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */ |
||
260 | surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT | |
||
261 | ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT; |
||
262 | surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | |
||
263 | (pitch - 1) << BRW_SURFACE_PITCH_SHIFT; |
||
264 | |||
265 | /* Emit relocation to surface contents. The 965 PRM, Volume 4, section |
||
266 | * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate |
||
267 | * physical cache. It is mapped in hardware to the sampler cache." |
||
268 | */ |
||
269 | if (bo) { |
||
270 | drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4, |
||
271 | bo, buffer_offset, |
||
272 | I915_GEM_DOMAIN_SAMPLER, |
||
273 | (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); |
||
274 | } |
||
275 | } |
||
276 | |||
277 | void |
||
278 | brw_update_buffer_texture_surface(struct gl_context *ctx, |
||
279 | unsigned unit, |
||
280 | uint32_t *surf_offset) |
||
281 | { |
||
282 | struct brw_context *brw = brw_context(ctx); |
||
283 | struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; |
||
284 | struct intel_buffer_object *intel_obj = |
||
285 | intel_buffer_object(tObj->BufferObject); |
||
286 | uint32_t size = tObj->BufferSize; |
||
287 | drm_intel_bo *bo = NULL; |
||
288 | mesa_format format = tObj->_BufferObjectFormat; |
||
289 | uint32_t brw_format = brw_format_for_mesa_format(format); |
||
290 | int texel_size = _mesa_get_format_bytes(format); |
||
291 | |||
292 | if (intel_obj) { |
||
293 | size = MIN2(size, intel_obj->Base.Size); |
||
294 | bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size); |
||
295 | } |
||
296 | |||
297 | if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) { |
||
298 | _mesa_problem(NULL, "bad format %s for texture buffer\n", |
||
299 | _mesa_get_format_name(format)); |
||
300 | } |
||
301 | |||
302 | brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo, |
||
303 | tObj->BufferOffset, |
||
304 | brw_format, |
||
305 | size / texel_size, |
||
306 | texel_size, |
||
307 | false /* rw */); |
||
308 | } |
||
309 | |||
310 | static void |
||
311 | brw_update_texture_surface(struct gl_context *ctx, |
||
312 | unsigned unit, |
||
313 | uint32_t *surf_offset, |
||
314 | bool for_gather) |
||
315 | { |
||
316 | struct brw_context *brw = brw_context(ctx); |
||
317 | struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; |
||
318 | struct intel_texture_object *intelObj = intel_texture_object(tObj); |
||
319 | struct intel_mipmap_tree *mt = intelObj->mt; |
||
320 | struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); |
||
321 | uint32_t *surf; |
||
322 | |||
323 | /* BRW_NEW_TEXTURE_BUFFER */ |
||
324 | if (tObj->Target == GL_TEXTURE_BUFFER) { |
||
325 | brw_update_buffer_texture_surface(ctx, unit, surf_offset); |
||
326 | return; |
||
327 | } |
||
328 | |||
329 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, |
||
330 | 6 * 4, 32, surf_offset); |
||
331 | |||
332 | uint32_t tex_format = translate_tex_format(brw, mt->format, |
||
333 | sampler->sRGBDecode); |
||
334 | |||
335 | if (for_gather) { |
||
336 | /* Sandybridge's gather4 message is broken for integer formats. |
||
337 | * To work around this, we pretend the surface is UNORM for |
||
338 | * 8 or 16-bit formats, and emit shader instructions to recover |
||
339 | * the real INT/UINT value. For 32-bit formats, we pretend |
||
340 | * the surface is FLOAT, and simply reinterpret the resulting |
||
341 | * bits. |
||
342 | */ |
||
343 | switch (tex_format) { |
||
344 | case BRW_SURFACEFORMAT_R8_SINT: |
||
345 | case BRW_SURFACEFORMAT_R8_UINT: |
||
346 | tex_format = BRW_SURFACEFORMAT_R8_UNORM; |
||
347 | break; |
||
348 | |||
349 | case BRW_SURFACEFORMAT_R16_SINT: |
||
350 | case BRW_SURFACEFORMAT_R16_UINT: |
||
351 | tex_format = BRW_SURFACEFORMAT_R16_UNORM; |
||
352 | break; |
||
353 | |||
354 | case BRW_SURFACEFORMAT_R32_SINT: |
||
355 | case BRW_SURFACEFORMAT_R32_UINT: |
||
356 | tex_format = BRW_SURFACEFORMAT_R32_FLOAT; |
||
357 | break; |
||
358 | |||
359 | default: |
||
360 | break; |
||
361 | } |
||
362 | } |
||
363 | |||
364 | surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | |
||
365 | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | |
||
366 | BRW_SURFACE_CUBEFACE_ENABLES | |
||
367 | tex_format << BRW_SURFACE_FORMAT_SHIFT); |
||
368 | |||
369 | surf[1] = mt->bo->offset64 + mt->offset; /* reloc */ |
||
370 | |||
371 | surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT | |
||
372 | (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT | |
||
373 | (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT); |
||
374 | |||
375 | surf[3] = (brw_get_surface_tiling_bits(mt->tiling) | |
||
376 | (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT | |
||
377 | (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); |
||
378 | |||
379 | surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) | |
||
380 | SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD)); |
||
381 | |||
382 | surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0; |
||
383 | |||
384 | /* Emit relocation to surface contents */ |
||
385 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
386 | *surf_offset + 4, |
||
387 | mt->bo, |
||
388 | surf[1] - mt->bo->offset64, |
||
389 | I915_GEM_DOMAIN_SAMPLER, 0); |
||
390 | } |
||
391 | |||
392 | /** |
||
393 | * Create the constant buffer surface. Vertex/fragment shader constants will be |
||
394 | * read from this buffer with Data Port Read instructions/messages. |
||
395 | */ |
||
396 | void |
||
397 | brw_create_constant_surface(struct brw_context *brw, |
||
398 | drm_intel_bo *bo, |
||
399 | uint32_t offset, |
||
400 | uint32_t size, |
||
401 | uint32_t *out_offset, |
||
402 | bool dword_pitch) |
||
403 | { |
||
404 | uint32_t stride = dword_pitch ? 4 : 16; |
||
405 | uint32_t elements = ALIGN(size, stride) / stride; |
||
406 | |||
407 | brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset, |
||
408 | BRW_SURFACEFORMAT_R32G32B32A32_FLOAT, |
||
409 | elements, stride, false); |
||
410 | } |
||
411 | |||
412 | /** |
||
413 | * Set up a binding table entry for use by stream output logic (transform |
||
414 | * feedback). |
||
415 | * |
||
416 | * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES. |
||
417 | */ |
||
418 | void |
||
419 | brw_update_sol_surface(struct brw_context *brw, |
||
420 | struct gl_buffer_object *buffer_obj, |
||
421 | uint32_t *out_offset, unsigned num_vector_components, |
||
422 | unsigned stride_dwords, unsigned offset_dwords) |
||
423 | { |
||
424 | struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj); |
||
425 | uint32_t offset_bytes = 4 * offset_dwords; |
||
426 | drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, |
||
427 | offset_bytes, |
||
428 | buffer_obj->Size - offset_bytes); |
||
429 | uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, |
||
430 | out_offset); |
||
431 | uint32_t pitch_minus_1 = 4*stride_dwords - 1; |
||
432 | size_t size_dwords = buffer_obj->Size / 4; |
||
433 | uint32_t buffer_size_minus_1, width, height, depth, surface_format; |
||
434 | |||
435 | /* FIXME: can we rely on core Mesa to ensure that the buffer isn't |
||
436 | * too big to map using a single binding table entry? |
||
437 | */ |
||
438 | assert((size_dwords - offset_dwords) / stride_dwords |
||
439 | <= BRW_MAX_NUM_BUFFER_ENTRIES); |
||
440 | |||
441 | if (size_dwords > offset_dwords + num_vector_components) { |
||
442 | /* There is room for at least 1 transform feedback output in the buffer. |
||
443 | * Compute the number of additional transform feedback outputs the |
||
444 | * buffer has room for. |
||
445 | */ |
||
446 | buffer_size_minus_1 = |
||
447 | (size_dwords - offset_dwords - num_vector_components) / stride_dwords; |
||
448 | } else { |
||
449 | /* There isn't even room for a single transform feedback output in the |
||
450 | * buffer. We can't configure the binding table entry to prevent output |
||
451 | * entirely; we'll have to rely on the geometry shader to detect |
||
452 | * overflow. But to minimize the damage in case of a bug, set up the |
||
453 | * binding table entry to just allow a single output. |
||
454 | */ |
||
455 | buffer_size_minus_1 = 0; |
||
456 | } |
||
457 | width = buffer_size_minus_1 & 0x7f; |
||
458 | height = (buffer_size_minus_1 & 0xfff80) >> 7; |
||
459 | depth = (buffer_size_minus_1 & 0x7f00000) >> 20; |
||
460 | |||
461 | switch (num_vector_components) { |
||
462 | case 1: |
||
463 | surface_format = BRW_SURFACEFORMAT_R32_FLOAT; |
||
464 | break; |
||
465 | case 2: |
||
466 | surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT; |
||
467 | break; |
||
468 | case 3: |
||
469 | surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; |
||
470 | break; |
||
471 | case 4: |
||
472 | surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; |
||
473 | break; |
||
474 | default: |
||
475 | unreachable("Invalid vector size for transform feedback output"); |
||
476 | } |
||
477 | |||
478 | surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | |
||
479 | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | |
||
480 | surface_format << BRW_SURFACE_FORMAT_SHIFT | |
||
481 | BRW_SURFACE_RC_READ_WRITE; |
||
482 | surf[1] = bo->offset64 + offset_bytes; /* reloc */ |
||
483 | surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | |
||
484 | height << BRW_SURFACE_HEIGHT_SHIFT); |
||
485 | surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | |
||
486 | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); |
||
487 | surf[4] = 0; |
||
488 | surf[5] = 0; |
||
489 | |||
490 | /* Emit relocation to surface contents. */ |
||
491 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
492 | *out_offset + 4, |
||
493 | bo, offset_bytes, |
||
494 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
||
495 | } |
||
496 | |||
497 | /* Creates a new WM constant buffer reflecting the current fragment program's |
||
498 | * constants, if needed by the fragment program. |
||
499 | * |
||
500 | * Otherwise, constants go through the CURBEs using the brw_constant_buffer |
||
501 | * state atom. |
||
502 | */ |
||
503 | static void |
||
504 | brw_upload_wm_pull_constants(struct brw_context *brw) |
||
505 | { |
||
506 | struct brw_stage_state *stage_state = &brw->wm.base; |
||
507 | /* BRW_NEW_FRAGMENT_PROGRAM */ |
||
508 | struct brw_fragment_program *fp = |
||
509 | (struct brw_fragment_program *) brw->fragment_program; |
||
510 | /* BRW_NEW_FS_PROG_DATA */ |
||
511 | struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base; |
||
512 | |||
513 | /* _NEW_PROGRAM_CONSTANTS */ |
||
514 | brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base, |
||
515 | stage_state, prog_data, true); |
||
516 | } |
||
517 | |||
518 | const struct brw_tracked_state brw_wm_pull_constants = { |
||
519 | .dirty = { |
||
520 | .mesa = _NEW_PROGRAM_CONSTANTS, |
||
521 | .brw = BRW_NEW_BATCH | |
||
522 | BRW_NEW_FRAGMENT_PROGRAM | |
||
523 | BRW_NEW_FS_PROG_DATA, |
||
524 | }, |
||
525 | .emit = brw_upload_wm_pull_constants, |
||
526 | }; |
||
527 | |||
528 | /** |
||
529 | * Creates a null renderbuffer surface. |
||
530 | * |
||
531 | * This is used when the shader doesn't write to any color output. An FB |
||
532 | * write to target 0 will still be emitted, because that's how the thread is |
||
533 | * terminated (and computed depth is returned), so we need to have the |
||
534 | * hardware discard the target 0 color output.. |
||
535 | */ |
||
536 | static void |
||
537 | brw_emit_null_surface_state(struct brw_context *brw, |
||
538 | unsigned width, |
||
539 | unsigned height, |
||
540 | unsigned samples, |
||
541 | uint32_t *out_offset) |
||
542 | { |
||
543 | /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming |
||
544 | * Notes): |
||
545 | * |
||
546 | * A null surface will be used in instances where an actual surface is |
||
547 | * not bound. When a write message is generated to a null surface, no |
||
548 | * actual surface is written to. When a read message (including any |
||
549 | * sampling engine message) is generated to a null surface, the result |
||
550 | * is all zeros. Note that a null surface type is allowed to be used |
||
551 | * with all messages, even if it is not specificially indicated as |
||
552 | * supported. All of the remaining fields in surface state are ignored |
||
553 | * for null surfaces, with the following exceptions: |
||
554 | * |
||
555 | * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the |
||
556 | * depth buffer’s corresponding state for all render target surfaces, |
||
557 | * including null. |
||
558 | * |
||
559 | * - Surface Format must be R8G8B8A8_UNORM. |
||
560 | */ |
||
561 | unsigned surface_type = BRW_SURFACE_NULL; |
||
562 | drm_intel_bo *bo = NULL; |
||
563 | unsigned pitch_minus_1 = 0; |
||
564 | uint32_t multisampling_state = 0; |
||
565 | uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, |
||
566 | out_offset); |
||
567 | |||
568 | if (samples > 1) { |
||
569 | /* On Gen6, null render targets seem to cause GPU hangs when |
||
570 | * multisampling. So work around this problem by rendering into dummy |
||
571 | * color buffer. |
||
572 | * |
||
573 | * To decrease the amount of memory needed by the workaround buffer, we |
||
574 | * set its pitch to 128 bytes (the width of a Y tile). This means that |
||
575 | * the amount of memory needed for the workaround buffer is |
||
576 | * (width_in_tiles + height_in_tiles - 1) tiles. |
||
577 | * |
||
578 | * Note that since the workaround buffer will be interpreted by the |
||
579 | * hardware as an interleaved multisampled buffer, we need to compute |
||
580 | * width_in_tiles and height_in_tiles by dividing the width and height |
||
581 | * by 16 rather than the normal Y-tile size of 32. |
||
582 | */ |
||
583 | unsigned width_in_tiles = ALIGN(width, 16) / 16; |
||
584 | unsigned height_in_tiles = ALIGN(height, 16) / 16; |
||
585 | unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; |
||
586 | brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo, |
||
587 | size_needed); |
||
588 | bo = brw->wm.multisampled_null_render_target_bo; |
||
589 | surface_type = BRW_SURFACE_2D; |
||
590 | pitch_minus_1 = 127; |
||
591 | multisampling_state = brw_get_surface_num_multisamples(samples); |
||
592 | } |
||
593 | |||
594 | surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT | |
||
595 | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); |
||
596 | if (brw->gen < 6) { |
||
597 | surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT | |
||
598 | 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT | |
||
599 | 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | |
||
600 | 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); |
||
601 | } |
||
602 | surf[1] = bo ? bo->offset64 : 0; |
||
603 | surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT | |
||
604 | (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); |
||
605 | |||
606 | /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming |
||
607 | * Notes): |
||
608 | * |
||
609 | * If Surface Type is SURFTYPE_NULL, this field must be TRUE |
||
610 | */ |
||
611 | surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | |
||
612 | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); |
||
613 | surf[4] = multisampling_state; |
||
614 | surf[5] = 0; |
||
615 | |||
616 | if (bo) { |
||
617 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
618 | *out_offset + 4, |
||
619 | bo, 0, |
||
620 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
||
621 | } |
||
622 | } |
||
623 | |||
624 | /** |
||
625 | * Sets up a surface state structure to point at the given region. |
||
626 | * While it is only used for the front/back buffer currently, it should be |
||
627 | * usable for further buffers when doing ARB_draw_buffer support. |
||
628 | */ |
||
629 | static uint32_t |
||
630 | brw_update_renderbuffer_surface(struct brw_context *brw, |
||
631 | struct gl_renderbuffer *rb, |
||
632 | bool layered, unsigned unit, |
||
633 | uint32_t surf_index) |
||
634 | { |
||
635 | struct gl_context *ctx = &brw->ctx; |
||
636 | struct intel_renderbuffer *irb = intel_renderbuffer(rb); |
||
637 | struct intel_mipmap_tree *mt = irb->mt; |
||
638 | uint32_t *surf; |
||
639 | uint32_t tile_x, tile_y; |
||
640 | uint32_t format = 0; |
||
641 | uint32_t offset; |
||
642 | /* _NEW_BUFFERS */ |
||
643 | mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); |
||
644 | /* BRW_NEW_FS_PROG_DATA */ |
||
645 | |||
646 | assert(!layered); |
||
647 | |||
648 | if (rb->TexImage && !brw->has_surface_tile_offset) { |
||
649 | intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y); |
||
650 | |||
651 | if (tile_x != 0 || tile_y != 0) { |
||
652 | /* Original gen4 hardware couldn't draw to a non-tile-aligned |
||
653 | * destination in a miptree unless you actually setup your renderbuffer |
||
654 | * as a miptree and used the fragile lod/array_index/etc. controls to |
||
655 | * select the image. So, instead, we just make a new single-level |
||
656 | * miptree and render into that. |
||
657 | */ |
||
658 | intel_renderbuffer_move_to_temp(brw, irb, false); |
||
659 | mt = irb->mt; |
||
660 | } |
||
661 | } |
||
662 | |||
663 | intel_miptree_used_for_rendering(irb->mt); |
||
664 | |||
665 | surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset); |
||
666 | |||
667 | format = brw->render_target_format[rb_format]; |
||
668 | if (unlikely(!brw->format_supported_as_render_target[rb_format])) { |
||
669 | _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", |
||
670 | __func__, _mesa_get_format_name(rb_format)); |
||
671 | } |
||
672 | |||
673 | surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | |
||
674 | format << BRW_SURFACE_FORMAT_SHIFT); |
||
675 | |||
676 | /* reloc */ |
||
677 | assert(mt->offset % mt->cpp == 0); |
||
678 | surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) + |
||
679 | mt->bo->offset64 + mt->offset); |
||
680 | |||
681 | surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | |
||
682 | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); |
||
683 | |||
684 | surf[3] = (brw_get_surface_tiling_bits(mt->tiling) | |
||
685 | (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); |
||
686 | |||
687 | surf[4] = brw_get_surface_num_multisamples(mt->num_samples); |
||
688 | |||
689 | assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); |
||
690 | /* Note that the low bits of these fields are missing, so |
||
691 | * there's the possibility of getting in trouble. |
||
692 | */ |
||
693 | assert(tile_x % 4 == 0); |
||
694 | assert(tile_y % 2 == 0); |
||
695 | surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | |
||
696 | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | |
||
697 | (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); |
||
698 | |||
699 | if (brw->gen < 6) { |
||
700 | /* _NEW_COLOR */ |
||
701 | if (!ctx->Color.ColorLogicOpEnabled && |
||
702 | (ctx->Color.BlendEnabled & (1 << unit))) |
||
703 | surf[0] |= BRW_SURFACE_BLEND_ENABLED; |
||
704 | |||
705 | if (!ctx->Color.ColorMask[unit][0]) |
||
706 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT; |
||
707 | if (!ctx->Color.ColorMask[unit][1]) |
||
708 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT; |
||
709 | if (!ctx->Color.ColorMask[unit][2]) |
||
710 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT; |
||
711 | |||
712 | /* As mentioned above, disable writes to the alpha component when the |
||
713 | * renderbuffer is XRGB. |
||
714 | */ |
||
715 | if (ctx->DrawBuffer->Visual.alphaBits == 0 || |
||
716 | !ctx->Color.ColorMask[unit][3]) { |
||
717 | surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT; |
||
718 | } |
||
719 | } |
||
720 | |||
721 | drm_intel_bo_emit_reloc(brw->batch.bo, |
||
722 | offset + 4, |
||
723 | mt->bo, |
||
724 | surf[1] - mt->bo->offset64, |
||
725 | I915_GEM_DOMAIN_RENDER, |
||
726 | I915_GEM_DOMAIN_RENDER); |
||
727 | |||
728 | return offset; |
||
729 | } |
||
730 | |||
731 | /** |
||
732 | * Construct SURFACE_STATE objects for renderbuffers/draw buffers. |
||
733 | */ |
||
734 | void |
||
735 | brw_update_renderbuffer_surfaces(struct brw_context *brw, |
||
736 | const struct gl_framebuffer *fb, |
||
737 | uint32_t render_target_start, |
||
738 | uint32_t *surf_offset) |
||
739 | { |
||
740 | GLuint i; |
||
741 | |||
742 | /* Update surfaces for drawing buffers */ |
||
743 | if (fb->_NumColorDrawBuffers >= 1) { |
||
744 | for (i = 0; i < fb->_NumColorDrawBuffers; i++) { |
||
745 | const uint32_t surf_index = render_target_start + i; |
||
746 | |||
747 | if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) { |
||
748 | surf_offset[surf_index] = |
||
749 | brw->vtbl.update_renderbuffer_surface( |
||
750 | brw, fb->_ColorDrawBuffers[i], |
||
751 | fb->MaxNumLayers > 0, i, surf_index); |
||
752 | } else { |
||
753 | brw->vtbl.emit_null_surface_state( |
||
754 | brw, fb->Width, fb->Height, fb->Visual.samples, |
||
755 | &surf_offset[surf_index]); |
||
756 | } |
||
757 | } |
||
758 | } else { |
||
759 | const uint32_t surf_index = render_target_start; |
||
760 | brw->vtbl.emit_null_surface_state( |
||
761 | brw, fb->Width, fb->Height, fb->Visual.samples, |
||
762 | &surf_offset[surf_index]); |
||
763 | } |
||
764 | } |
||
765 | |||
766 | static void |
||
767 | update_renderbuffer_surfaces(struct brw_context *brw) |
||
768 | { |
||
769 | const struct gl_context *ctx = &brw->ctx; |
||
770 | |||
771 | /* _NEW_BUFFERS | _NEW_COLOR */ |
||
772 | const struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
773 | brw_update_renderbuffer_surfaces( |
||
774 | brw, fb, |
||
775 | brw->wm.prog_data->binding_table.render_target_start, |
||
776 | brw->wm.base.surf_offset); |
||
777 | brw->ctx.NewDriverState |= BRW_NEW_SURFACES; |
||
778 | } |
||
779 | |||
780 | const struct brw_tracked_state brw_renderbuffer_surfaces = { |
||
781 | .dirty = { |
||
782 | .mesa = _NEW_BUFFERS | |
||
783 | _NEW_COLOR, |
||
784 | .brw = BRW_NEW_BATCH | |
||
785 | BRW_NEW_FS_PROG_DATA, |
||
786 | }, |
||
787 | .emit = update_renderbuffer_surfaces, |
||
788 | }; |
||
789 | |||
790 | const struct brw_tracked_state gen6_renderbuffer_surfaces = { |
||
791 | .dirty = { |
||
792 | .mesa = _NEW_BUFFERS, |
||
793 | .brw = BRW_NEW_BATCH, |
||
794 | }, |
||
795 | .emit = update_renderbuffer_surfaces, |
||
796 | }; |
||
797 | |||
798 | |||
799 | static void |
||
800 | update_stage_texture_surfaces(struct brw_context *brw, |
||
801 | const struct gl_program *prog, |
||
802 | struct brw_stage_state *stage_state, |
||
803 | bool for_gather) |
||
804 | { |
||
805 | if (!prog) |
||
806 | return; |
||
807 | |||
808 | struct gl_context *ctx = &brw->ctx; |
||
809 | |||
810 | uint32_t *surf_offset = stage_state->surf_offset; |
||
811 | |||
812 | /* BRW_NEW_*_PROG_DATA */ |
||
813 | if (for_gather) |
||
814 | surf_offset += stage_state->prog_data->binding_table.gather_texture_start; |
||
815 | else |
||
816 | surf_offset += stage_state->prog_data->binding_table.texture_start; |
||
817 | |||
818 | unsigned num_samplers = _mesa_fls(prog->SamplersUsed); |
||
819 | for (unsigned s = 0; s < num_samplers; s++) { |
||
820 | surf_offset[s] = 0; |
||
821 | |||
822 | if (prog->SamplersUsed & (1 << s)) { |
||
823 | const unsigned unit = prog->SamplerUnits[s]; |
||
824 | |||
825 | /* _NEW_TEXTURE */ |
||
826 | if (ctx->Texture.Unit[unit]._Current) { |
||
827 | brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather); |
||
828 | } |
||
829 | } |
||
830 | } |
||
831 | } |
||
832 | |||
833 | |||
834 | /** |
||
835 | * Construct SURFACE_STATE objects for enabled textures. |
||
836 | */ |
||
837 | static void |
||
838 | brw_update_texture_surfaces(struct brw_context *brw) |
||
839 | { |
||
840 | /* BRW_NEW_VERTEX_PROGRAM */ |
||
841 | struct gl_program *vs = (struct gl_program *) brw->vertex_program; |
||
842 | |||
843 | /* BRW_NEW_GEOMETRY_PROGRAM */ |
||
844 | struct gl_program *gs = (struct gl_program *) brw->geometry_program; |
||
845 | |||
846 | /* BRW_NEW_FRAGMENT_PROGRAM */ |
||
847 | struct gl_program *fs = (struct gl_program *) brw->fragment_program; |
||
848 | |||
849 | /* _NEW_TEXTURE */ |
||
850 | update_stage_texture_surfaces(brw, vs, &brw->vs.base, false); |
||
851 | update_stage_texture_surfaces(brw, gs, &brw->gs.base, false); |
||
852 | update_stage_texture_surfaces(brw, fs, &brw->wm.base, false); |
||
853 | |||
854 | /* emit alternate set of surface state for gather. this |
||
855 | * allows the surface format to be overriden for only the |
||
856 | * gather4 messages. */ |
||
857 | if (brw->gen < 8) { |
||
858 | if (vs && vs->UsesGather) |
||
859 | update_stage_texture_surfaces(brw, vs, &brw->vs.base, true); |
||
860 | if (gs && gs->UsesGather) |
||
861 | update_stage_texture_surfaces(brw, gs, &brw->gs.base, true); |
||
862 | if (fs && fs->UsesGather) |
||
863 | update_stage_texture_surfaces(brw, fs, &brw->wm.base, true); |
||
864 | } |
||
865 | |||
866 | brw->ctx.NewDriverState |= BRW_NEW_SURFACES; |
||
867 | } |
||
868 | |||
869 | const struct brw_tracked_state brw_texture_surfaces = { |
||
870 | .dirty = { |
||
871 | .mesa = _NEW_TEXTURE, |
||
872 | .brw = BRW_NEW_BATCH | |
||
873 | BRW_NEW_FRAGMENT_PROGRAM | |
||
874 | BRW_NEW_FS_PROG_DATA | |
||
875 | BRW_NEW_GEOMETRY_PROGRAM | |
||
876 | BRW_NEW_GS_PROG_DATA | |
||
877 | BRW_NEW_TEXTURE_BUFFER | |
||
878 | BRW_NEW_VERTEX_PROGRAM | |
||
879 | BRW_NEW_VS_PROG_DATA, |
||
880 | }, |
||
881 | .emit = brw_update_texture_surfaces, |
||
882 | }; |
||
883 | |||
884 | void |
||
885 | brw_upload_ubo_surfaces(struct brw_context *brw, |
||
886 | struct gl_shader *shader, |
||
887 | struct brw_stage_state *stage_state, |
||
888 | struct brw_stage_prog_data *prog_data, |
||
889 | bool dword_pitch) |
||
890 | { |
||
891 | struct gl_context *ctx = &brw->ctx; |
||
892 | |||
893 | if (!shader) |
||
894 | return; |
||
895 | |||
896 | uint32_t *surf_offsets = |
||
897 | &stage_state->surf_offset[prog_data->binding_table.ubo_start]; |
||
898 | |||
899 | for (int i = 0; i < shader->NumUniformBlocks; i++) { |
||
900 | struct gl_uniform_buffer_binding *binding; |
||
901 | struct intel_buffer_object *intel_bo; |
||
902 | |||
903 | binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding]; |
||
904 | intel_bo = intel_buffer_object(binding->BufferObject); |
||
905 | drm_intel_bo *bo = |
||
906 | intel_bufferobj_buffer(brw, intel_bo, |
||
907 | binding->Offset, |
||
908 | binding->BufferObject->Size - binding->Offset); |
||
909 | |||
910 | /* Because behavior for referencing outside of the binding's size in the |
||
911 | * glBindBufferRange case is undefined, we can just bind the whole buffer |
||
912 | * glBindBufferBase wants and be a correct implementation. |
||
913 | */ |
||
914 | brw_create_constant_surface(brw, bo, binding->Offset, |
||
915 | bo->size - binding->Offset, |
||
916 | &surf_offsets[i], |
||
917 | dword_pitch); |
||
918 | } |
||
919 | |||
920 | if (shader->NumUniformBlocks) |
||
921 | brw->ctx.NewDriverState |= BRW_NEW_SURFACES; |
||
922 | } |
||
923 | |||
924 | static void |
||
925 | brw_upload_wm_ubo_surfaces(struct brw_context *brw) |
||
926 | { |
||
927 | struct gl_context *ctx = &brw->ctx; |
||
928 | /* _NEW_PROGRAM */ |
||
929 | struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram; |
||
930 | |||
931 | if (!prog) |
||
932 | return; |
||
933 | |||
934 | /* BRW_NEW_FS_PROG_DATA */ |
||
935 | brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], |
||
936 | &brw->wm.base, &brw->wm.prog_data->base, true); |
||
937 | } |
||
938 | |||
939 | const struct brw_tracked_state brw_wm_ubo_surfaces = { |
||
940 | .dirty = { |
||
941 | .mesa = _NEW_PROGRAM, |
||
942 | .brw = BRW_NEW_BATCH | |
||
943 | BRW_NEW_FS_PROG_DATA | |
||
944 | BRW_NEW_UNIFORM_BUFFER, |
||
945 | }, |
||
946 | .emit = brw_upload_wm_ubo_surfaces, |
||
947 | }; |
||
948 | |||
949 | void |
||
950 | brw_upload_abo_surfaces(struct brw_context *brw, |
||
951 | struct gl_shader_program *prog, |
||
952 | struct brw_stage_state *stage_state, |
||
953 | struct brw_stage_prog_data *prog_data) |
||
954 | { |
||
955 | struct gl_context *ctx = &brw->ctx; |
||
956 | uint32_t *surf_offsets = |
||
957 | &stage_state->surf_offset[prog_data->binding_table.abo_start]; |
||
958 | |||
959 | for (int i = 0; i < prog->NumAtomicBuffers; i++) { |
||
960 | struct gl_atomic_buffer_binding *binding = |
||
961 | &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; |
||
962 | struct intel_buffer_object *intel_bo = |
||
963 | intel_buffer_object(binding->BufferObject); |
||
964 | drm_intel_bo *bo = intel_bufferobj_buffer( |
||
965 | brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); |
||
966 | |||
967 | brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, |
||
968 | binding->Offset, BRW_SURFACEFORMAT_RAW, |
||
969 | bo->size - binding->Offset, 1, true); |
||
970 | } |
||
971 | |||
972 | if (prog->NumAtomicBuffers) |
||
973 | brw->ctx.NewDriverState |= BRW_NEW_SURFACES; |
||
974 | } |
||
975 | |||
976 | static void |
||
977 | brw_upload_wm_abo_surfaces(struct brw_context *brw) |
||
978 | { |
||
979 | struct gl_context *ctx = &brw->ctx; |
||
980 | /* _NEW_PROGRAM */ |
||
981 | struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; |
||
982 | |||
983 | if (prog) { |
||
984 | /* BRW_NEW_FS_PROG_DATA */ |
||
985 | brw_upload_abo_surfaces(brw, prog, &brw->wm.base, |
||
986 | &brw->wm.prog_data->base); |
||
987 | } |
||
988 | } |
||
989 | |||
990 | const struct brw_tracked_state brw_wm_abo_surfaces = { |
||
991 | .dirty = { |
||
992 | .mesa = _NEW_PROGRAM, |
||
993 | .brw = BRW_NEW_ATOMIC_BUFFER | |
||
994 | BRW_NEW_BATCH | |
||
995 | BRW_NEW_FS_PROG_DATA, |
||
996 | }, |
||
997 | .emit = brw_upload_wm_abo_surfaces, |
||
998 | }; |
||
999 | |||
1000 | static void |
||
1001 | brw_upload_cs_abo_surfaces(struct brw_context *brw) |
||
1002 | { |
||
1003 | struct gl_context *ctx = &brw->ctx; |
||
1004 | /* _NEW_PROGRAM */ |
||
1005 | struct gl_shader_program *prog = |
||
1006 | ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; |
||
1007 | |||
1008 | if (prog) { |
||
1009 | /* BRW_NEW_CS_PROG_DATA */ |
||
1010 | brw_upload_abo_surfaces(brw, prog, &brw->cs.base, |
||
1011 | &brw->cs.prog_data->base); |
||
1012 | } |
||
1013 | } |
||
1014 | |||
1015 | const struct brw_tracked_state brw_cs_abo_surfaces = { |
||
1016 | .dirty = { |
||
1017 | .mesa = _NEW_PROGRAM, |
||
1018 | .brw = BRW_NEW_ATOMIC_BUFFER | |
||
1019 | BRW_NEW_BATCH | |
||
1020 | BRW_NEW_CS_PROG_DATA, |
||
1021 | }, |
||
1022 | .emit = brw_upload_cs_abo_surfaces, |
||
1023 | }; |
||
1024 | |||
1025 | void |
||
1026 | gen4_init_vtable_surface_functions(struct brw_context *brw) |
||
1027 | { |
||
1028 | brw->vtbl.update_texture_surface = brw_update_texture_surface; |
||
1029 | brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface; |
||
1030 | brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state; |
||
1031 | brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state; |
||
1032 | }>>>><>>>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>=>><>><>><>><>><>><>><>><>><>><>><>><>><>><> |