Subversion Repositories Kolibri OS

Rev

Rev 4401 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
 develop this 3D driver.
5
 
6
 Permission is hereby granted, free of charge, to any person obtaining
7
 a copy of this software and associated documentation files (the
8
 "Software"), to deal in the Software without restriction, including
9
 without limitation the rights to use, copy, modify, merge, publish,
10
 distribute, sublicense, and/or sell copies of the Software, and to
11
 permit persons to whom the Software is furnished to do so, subject to
12
 the following conditions:
13
 
14
 The above copyright notice and this permission notice (including the
15
 next paragraph) shall be included in all copies or substantial
16
 portions of the Software.
17
 
18
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 **********************************************************************/
27
 /*
28
  * Authors:
29
  *   Keith Whitwell 
30
  */
31
 
32
 
33
#include "main/context.h"
34
#include "main/blend.h"
35
#include "main/mtypes.h"
36
#include "main/samplerobj.h"
37
#include "program/prog_parameter.h"
38
 
39
#include "intel_mipmap_tree.h"
40
#include "intel_batchbuffer.h"
41
#include "intel_tex.h"
42
#include "intel_fbo.h"
43
#include "intel_buffer_objects.h"
44
 
45
#include "brw_context.h"
46
#include "brw_state.h"
47
#include "brw_defines.h"
48
#include "brw_wm.h"
49
 
50
GLuint
51
translate_tex_target(GLenum target)
52
{
53
   switch (target) {
54
   case GL_TEXTURE_1D:
55
   case GL_TEXTURE_1D_ARRAY_EXT:
56
      return BRW_SURFACE_1D;
57
 
58
   case GL_TEXTURE_RECTANGLE_NV:
59
      return BRW_SURFACE_2D;
60
 
61
   case GL_TEXTURE_2D:
62
   case GL_TEXTURE_2D_ARRAY_EXT:
63
   case GL_TEXTURE_EXTERNAL_OES:
64
   case GL_TEXTURE_2D_MULTISAMPLE:
65
   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66
      return BRW_SURFACE_2D;
67
 
68
   case GL_TEXTURE_3D:
69
      return BRW_SURFACE_3D;
70
 
71
   case GL_TEXTURE_CUBE_MAP:
72
   case GL_TEXTURE_CUBE_MAP_ARRAY:
73
      return BRW_SURFACE_CUBE;
74
 
75
   default:
76
      assert(0);
77
      return 0;
78
   }
79
}
80
 
81
uint32_t
82
brw_get_surface_tiling_bits(uint32_t tiling)
83
{
84
   switch (tiling) {
85
   case I915_TILING_X:
86
      return BRW_SURFACE_TILED;
87
   case I915_TILING_Y:
88
      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89
   default:
90
      return 0;
91
   }
92
}
93
 
94
 
95
uint32_t
96
brw_get_surface_num_multisamples(unsigned num_samples)
97
{
98
   if (num_samples > 1)
99
      return BRW_SURFACE_MULTISAMPLECOUNT_4;
100
   else
101
      return BRW_SURFACE_MULTISAMPLECOUNT_1;
102
}
103
 
104
 
105
/**
106
 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107
 * swizzling.
108
 */
109
int
110
brw_get_texture_swizzle(const struct gl_context *ctx,
111
                        const struct gl_texture_object *t)
112
{
113
   const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
 
115
   int swizzles[SWIZZLE_NIL + 1] = {
116
      SWIZZLE_X,
117
      SWIZZLE_Y,
118
      SWIZZLE_Z,
119
      SWIZZLE_W,
120
      SWIZZLE_ZERO,
121
      SWIZZLE_ONE,
122
      SWIZZLE_NIL
123
   };
124
 
125
   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126
       img->_BaseFormat == GL_DEPTH_STENCIL) {
127
      GLenum depth_mode = t->DepthMode;
128
 
129
      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130
       * with depth component data specified with a sized internal format.
131
       * Otherwise, it's left at the old default, GL_LUMINANCE.
132
       */
133
      if (_mesa_is_gles3(ctx) &&
134
          img->InternalFormat != GL_DEPTH_COMPONENT &&
135
          img->InternalFormat != GL_DEPTH_STENCIL) {
136
         depth_mode = GL_RED;
137
      }
138
 
139
      switch (depth_mode) {
140
      case GL_ALPHA:
141
         swizzles[0] = SWIZZLE_ZERO;
142
         swizzles[1] = SWIZZLE_ZERO;
143
         swizzles[2] = SWIZZLE_ZERO;
144
         swizzles[3] = SWIZZLE_X;
145
         break;
146
      case GL_LUMINANCE:
147
         swizzles[0] = SWIZZLE_X;
148
         swizzles[1] = SWIZZLE_X;
149
         swizzles[2] = SWIZZLE_X;
150
         swizzles[3] = SWIZZLE_ONE;
151
         break;
152
      case GL_INTENSITY:
153
         swizzles[0] = SWIZZLE_X;
154
         swizzles[1] = SWIZZLE_X;
155
         swizzles[2] = SWIZZLE_X;
156
         swizzles[3] = SWIZZLE_X;
157
         break;
158
      case GL_RED:
159
         swizzles[0] = SWIZZLE_X;
160
         swizzles[1] = SWIZZLE_ZERO;
161
         swizzles[2] = SWIZZLE_ZERO;
162
         swizzles[3] = SWIZZLE_ONE;
163
         break;
164
      }
165
   }
166
 
167
   /* If the texture's format is alpha-only, force R, G, and B to
168
    * 0.0. Similarly, if the texture's format has no alpha channel,
169
    * force the alpha value read to 1.0. This allows for the
170
    * implementation to use an RGBA texture for any of these formats
171
    * without leaking any unexpected values.
172
    */
173
   switch (img->_BaseFormat) {
174
   case GL_ALPHA:
175
      swizzles[0] = SWIZZLE_ZERO;
176
      swizzles[1] = SWIZZLE_ZERO;
177
      swizzles[2] = SWIZZLE_ZERO;
178
      break;
179
   case GL_RED:
180
   case GL_RG:
181
   case GL_RGB:
182
      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183
         swizzles[3] = SWIZZLE_ONE;
184
      break;
185
   }
186
 
187
   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188
                        swizzles[GET_SWZ(t->_Swizzle, 1)],
189
                        swizzles[GET_SWZ(t->_Swizzle, 2)],
190
                        swizzles[GET_SWZ(t->_Swizzle, 3)]);
191
}
192
 
193
 
194
static void
195
brw_update_buffer_texture_surface(struct gl_context *ctx,
196
                                  unsigned unit,
197
                                  uint32_t *binding_table,
198
                                  unsigned surf_index)
199
{
200
   struct brw_context *brw = brw_context(ctx);
201
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
202
   uint32_t *surf;
203
   struct intel_buffer_object *intel_obj =
204
      intel_buffer_object(tObj->BufferObject);
205
   drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
206
   gl_format format = tObj->_BufferObjectFormat;
207
   uint32_t brw_format = brw_format_for_mesa_format(format);
208
   int texel_size = _mesa_get_format_bytes(format);
209
 
210
   if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
211
      _mesa_problem(NULL, "bad format %s for texture buffer\n",
212
		    _mesa_get_format_name(format));
213
   }
214
 
215
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
216
			  6 * 4, 32, &binding_table[surf_index]);
217
 
218
   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
219
	      (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
220
 
221
   if (brw->gen >= 6)
222
      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
223
 
224
   if (bo) {
225
      surf[1] = bo->offset; /* reloc */
226
 
227
      /* Emit relocation to surface contents. */
228
      drm_intel_bo_emit_reloc(brw->batch.bo,
229
			      binding_table[surf_index] + 4,
230
			      bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
231
 
232
      int w = intel_obj->Base.Size / texel_size;
233
      surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
234
		 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
235
      surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
236
		 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
237
   } else {
238
      surf[1] = 0;
239
      surf[2] = 0;
240
      surf[3] = 0;
241
   }
242
 
243
   surf[4] = 0;
244
   surf[5] = 0;
245
}
246
 
247
static void
248
brw_update_texture_surface(struct gl_context *ctx,
249
                           unsigned unit,
250
                           uint32_t *binding_table,
251
                           unsigned surf_index)
252
{
253
   struct brw_context *brw = brw_context(ctx);
254
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
255
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
256
   struct intel_mipmap_tree *mt = intelObj->mt;
257
   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
258
   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
259
   uint32_t *surf;
260
   uint32_t tile_x, tile_y;
261
 
4401 Serge 262
   /* BRW_NEW_UNIFORM_BUFFER */
4358 Serge 263
   if (tObj->Target == GL_TEXTURE_BUFFER) {
264
      brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
265
      return;
266
   }
267
 
268
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
269
			  6 * 4, 32, &binding_table[surf_index]);
270
 
271
   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
272
	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
273
	      BRW_SURFACE_CUBEFACE_ENABLES |
274
	      (translate_tex_format(brw,
275
                                    mt->format,
276
				    tObj->DepthMode,
277
				    sampler->sRGBDecode) <<
278
	       BRW_SURFACE_FORMAT_SHIFT));
279
 
280
   surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
281
   surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
282
                                             &tile_x, &tile_y);
283
 
284
   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
285
	      (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
286
	      (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
287
 
288
   surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
289
	      (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
290
	      (intelObj->mt->region->pitch - 1) <<
291
	      BRW_SURFACE_PITCH_SHIFT);
292
 
293
   surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
294
 
295
   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
296
   /* Note that the low bits of these fields are missing, so
297
    * there's the possibility of getting in trouble.
298
    */
299
   assert(tile_x % 4 == 0);
300
   assert(tile_y % 2 == 0);
301
   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
302
	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
303
	      (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
304
 
305
   /* Emit relocation to surface contents */
306
   drm_intel_bo_emit_reloc(brw->batch.bo,
307
			   binding_table[surf_index] + 4,
308
			   intelObj->mt->region->bo,
309
                           surf[1] - intelObj->mt->region->bo->offset,
310
			   I915_GEM_DOMAIN_SAMPLER, 0);
311
}
312
 
313
/**
314
 * Create the constant buffer surface.  Vertex/fragment shader constants will be
315
 * read from this buffer with Data Port Read instructions/messages.
316
 */
317
static void
318
brw_create_constant_surface(struct brw_context *brw,
319
			    drm_intel_bo *bo,
320
			    uint32_t offset,
321
			    uint32_t size,
322
			    uint32_t *out_offset,
323
                            bool dword_pitch)
324
{
325
   uint32_t stride = dword_pitch ? 4 : 16;
326
   uint32_t elements = ALIGN(size, stride) / stride;
327
   const GLint w = elements - 1;
328
   uint32_t *surf;
329
 
330
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
331
			  6 * 4, 32, out_offset);
332
 
333
   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
334
	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
335
	      BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
336
 
337
   if (brw->gen >= 6)
338
      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
339
 
340
   surf[1] = bo->offset + offset; /* reloc */
341
 
342
   surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
343
	      ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
344
 
345
   surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
346
	      (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
347
 
348
   surf[4] = 0;
349
   surf[5] = 0;
350
 
351
   /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
352
    * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
353
    * physical cache.  It is mapped in hardware to the sampler cache."
354
    */
355
   drm_intel_bo_emit_reloc(brw->batch.bo,
356
			   *out_offset + 4,
357
			   bo, offset,
358
			   I915_GEM_DOMAIN_SAMPLER, 0);
359
}
360
 
361
/**
362
 * Set up a binding table entry for use by stream output logic (transform
363
 * feedback).
364
 *
365
 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
366
 */
367
void
368
brw_update_sol_surface(struct brw_context *brw,
369
                       struct gl_buffer_object *buffer_obj,
370
                       uint32_t *out_offset, unsigned num_vector_components,
371
                       unsigned stride_dwords, unsigned offset_dwords)
372
{
373
   struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
374
   drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
375
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
376
                                    out_offset);
377
   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
378
   uint32_t offset_bytes = 4 * offset_dwords;
379
   size_t size_dwords = buffer_obj->Size / 4;
380
   uint32_t buffer_size_minus_1, width, height, depth, surface_format;
381
 
382
   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
383
    * too big to map using a single binding table entry?
384
    */
385
   assert((size_dwords - offset_dwords) / stride_dwords
386
          <= BRW_MAX_NUM_BUFFER_ENTRIES);
387
 
388
   if (size_dwords > offset_dwords + num_vector_components) {
389
      /* There is room for at least 1 transform feedback output in the buffer.
390
       * Compute the number of additional transform feedback outputs the
391
       * buffer has room for.
392
       */
393
      buffer_size_minus_1 =
394
         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
395
   } else {
396
      /* There isn't even room for a single transform feedback output in the
397
       * buffer.  We can't configure the binding table entry to prevent output
398
       * entirely; we'll have to rely on the geometry shader to detect
399
       * overflow.  But to minimize the damage in case of a bug, set up the
400
       * binding table entry to just allow a single output.
401
       */
402
      buffer_size_minus_1 = 0;
403
   }
404
   width = buffer_size_minus_1 & 0x7f;
405
   height = (buffer_size_minus_1 & 0xfff80) >> 7;
406
   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
407
 
408
   switch (num_vector_components) {
409
   case 1:
410
      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
411
      break;
412
   case 2:
413
      surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
414
      break;
415
   case 3:
416
      surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
417
      break;
418
   case 4:
419
      surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
420
      break;
421
   default:
422
      assert(!"Invalid vector size for transform feedback output");
423
      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
424
      break;
425
   }
426
 
427
   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
428
      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
429
      surface_format << BRW_SURFACE_FORMAT_SHIFT |
430
      BRW_SURFACE_RC_READ_WRITE;
431
   surf[1] = bo->offset + offset_bytes; /* reloc */
432
   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
433
	      height << BRW_SURFACE_HEIGHT_SHIFT);
434
   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
435
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
436
   surf[4] = 0;
437
   surf[5] = 0;
438
 
439
   /* Emit relocation to surface contents. */
440
   drm_intel_bo_emit_reloc(brw->batch.bo,
441
			   *out_offset + 4,
442
			   bo, offset_bytes,
443
			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
444
}
445
 
446
/* Creates a new WM constant buffer reflecting the current fragment program's
447
 * constants, if needed by the fragment program.
448
 *
449
 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
450
 * state atom.
451
 */
452
static void
453
brw_upload_wm_pull_constants(struct brw_context *brw)
454
{
455
   struct gl_context *ctx = &brw->ctx;
456
   /* BRW_NEW_FRAGMENT_PROGRAM */
457
   struct brw_fragment_program *fp =
458
      (struct brw_fragment_program *) brw->fragment_program;
459
   struct gl_program_parameter_list *params = fp->program.Base.Parameters;
460
   const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
461
   const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
462
   float *constants;
463
   unsigned int i;
464
 
465
   _mesa_load_state_parameters(ctx, params);
466
 
467
   /* CACHE_NEW_WM_PROG */
468
   if (brw->wm.prog_data->nr_pull_params == 0) {
469
      if (brw->wm.const_bo) {
470
	 drm_intel_bo_unreference(brw->wm.const_bo);
471
	 brw->wm.const_bo = NULL;
472
	 brw->wm.surf_offset[surf_index] = 0;
473
	 brw->state.dirty.brw |= BRW_NEW_SURFACES;
474
      }
475
      return;
476
   }
477
 
478
   drm_intel_bo_unreference(brw->wm.const_bo);
479
   brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
480
					 size, 64);
481
 
482
   /* _NEW_PROGRAM_CONSTANTS */
483
   drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
484
   constants = brw->wm.const_bo->virtual;
485
   for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
486
      constants[i] = *brw->wm.prog_data->pull_param[i];
487
   }
488
   drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
489
 
490
   brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
491
                                     &brw->wm.surf_offset[surf_index],
492
                                     true);
493
 
494
   brw->state.dirty.brw |= BRW_NEW_SURFACES;
495
}
496
 
497
const struct brw_tracked_state brw_wm_pull_constants = {
498
   .dirty = {
499
      .mesa = (_NEW_PROGRAM_CONSTANTS),
500
      .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
501
      .cache = CACHE_NEW_WM_PROG,
502
   },
503
   .emit = brw_upload_wm_pull_constants,
504
};
505
 
506
static void
507
brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
508
{
509
   /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
510
    * Notes):
511
    *
512
    *     A null surface will be used in instances where an actual surface is
513
    *     not bound. When a write message is generated to a null surface, no
514
    *     actual surface is written to. When a read message (including any
515
    *     sampling engine message) is generated to a null surface, the result
516
    *     is all zeros. Note that a null surface type is allowed to be used
517
    *     with all messages, even if it is not specificially indicated as
518
    *     supported. All of the remaining fields in surface state are ignored
519
    *     for null surfaces, with the following exceptions:
520
    *
521
    *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
522
    *       depth buffer’s corresponding state for all render target surfaces,
523
    *       including null.
524
    *
525
    *     - Surface Format must be R8G8B8A8_UNORM.
526
    */
527
   struct gl_context *ctx = &brw->ctx;
528
   uint32_t *surf;
529
   unsigned surface_type = BRW_SURFACE_NULL;
530
   drm_intel_bo *bo = NULL;
531
   unsigned pitch_minus_1 = 0;
532
   uint32_t multisampling_state = 0;
533
 
534
   /* _NEW_BUFFERS */
535
   const struct gl_framebuffer *fb = ctx->DrawBuffer;
536
 
537
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
538
			  6 * 4, 32, &brw->wm.surf_offset[unit]);
539
 
540
   if (fb->Visual.samples > 1) {
541
      /* On Gen6, null render targets seem to cause GPU hangs when
542
       * multisampling.  So work around this problem by rendering into dummy
543
       * color buffer.
544
       *
545
       * To decrease the amount of memory needed by the workaround buffer, we
546
       * set its pitch to 128 bytes (the width of a Y tile).  This means that
547
       * the amount of memory needed for the workaround buffer is
548
       * (width_in_tiles + height_in_tiles - 1) tiles.
549
       *
550
       * Note that since the workaround buffer will be interpreted by the
551
       * hardware as an interleaved multisampled buffer, we need to compute
552
       * width_in_tiles and height_in_tiles by dividing the width and height
553
       * by 16 rather than the normal Y-tile size of 32.
554
       */
555
      unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
556
      unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
557
      unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
558
      brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
559
                         size_needed);
560
      bo = brw->wm.multisampled_null_render_target_bo;
561
      surface_type = BRW_SURFACE_2D;
562
      pitch_minus_1 = 127;
563
      multisampling_state =
564
         brw_get_surface_num_multisamples(fb->Visual.samples);
565
   }
566
 
567
   surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
568
	      BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
569
   if (brw->gen < 6) {
570
      surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
571
		  1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
572
		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
573
		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
574
   }
575
   surf[1] = bo ? bo->offset : 0;
576
   surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
577
              (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
578
 
579
   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
580
    * Notes):
581
    *
582
    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
583
    */
584
   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
585
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
586
   surf[4] = multisampling_state;
587
   surf[5] = 0;
588
 
589
   if (bo) {
590
      drm_intel_bo_emit_reloc(brw->batch.bo,
591
                              brw->wm.surf_offset[unit] + 4,
592
                              bo, 0,
593
                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
594
   }
595
}
596
 
597
/**
598
 * Sets up a surface state structure to point at the given region.
599
 * While it is only used for the front/back buffer currently, it should be
600
 * usable for further buffers when doing ARB_draw_buffer support.
601
 */
602
static void
603
brw_update_renderbuffer_surface(struct brw_context *brw,
604
				struct gl_renderbuffer *rb,
605
				bool layered,
606
				unsigned int unit)
607
{
608
   struct gl_context *ctx = &brw->ctx;
609
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
610
   struct intel_mipmap_tree *mt = irb->mt;
611
   struct intel_region *region;
612
   uint32_t *surf;
613
   uint32_t tile_x, tile_y;
614
   uint32_t format = 0;
615
   /* _NEW_BUFFERS */
616
   gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
617
 
618
   assert(!layered);
619
 
620
   if (rb->TexImage && !brw->has_surface_tile_offset) {
621
      intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
622
 
623
      if (tile_x != 0 || tile_y != 0) {
624
	 /* Original gen4 hardware couldn't draw to a non-tile-aligned
625
	  * destination in a miptree unless you actually setup your renderbuffer
626
	  * as a miptree and used the fragile lod/array_index/etc. controls to
627
	  * select the image.  So, instead, we just make a new single-level
628
	  * miptree and render into that.
629
	  */
630
	 intel_renderbuffer_move_to_temp(brw, irb, false);
631
	 mt = irb->mt;
632
      }
633
   }
634
 
635
   intel_miptree_used_for_rendering(irb->mt);
636
 
637
   region = irb->mt->region;
638
 
639
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
640
			  6 * 4, 32, &brw->wm.surf_offset[unit]);
641
 
642
   format = brw->render_target_format[rb_format];
643
   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
644
      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
645
                    __FUNCTION__, _mesa_get_format_name(rb_format));
646
   }
647
 
648
   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
649
	      format << BRW_SURFACE_FORMAT_SHIFT);
650
 
651
   /* reloc */
652
   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
653
	      region->bo->offset);
654
 
655
   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
656
	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
657
 
658
   surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
659
	      (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
660
 
661
   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
662
 
663
   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
664
   /* Note that the low bits of these fields are missing, so
665
    * there's the possibility of getting in trouble.
666
    */
667
   assert(tile_x % 4 == 0);
668
   assert(tile_y % 2 == 0);
669
   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
670
	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
671
	      (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
672
 
673
   if (brw->gen < 6) {
674
      /* _NEW_COLOR */
675
      if (!ctx->Color.ColorLogicOpEnabled &&
676
	  (ctx->Color.BlendEnabled & (1 << unit)))
677
	 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
678
 
679
      if (!ctx->Color.ColorMask[unit][0])
680
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
681
      if (!ctx->Color.ColorMask[unit][1])
682
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
683
      if (!ctx->Color.ColorMask[unit][2])
684
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
685
 
686
      /* As mentioned above, disable writes to the alpha component when the
687
       * renderbuffer is XRGB.
688
       */
689
      if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
690
	  !ctx->Color.ColorMask[unit][3]) {
691
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
692
      }
693
   }
694
 
695
   drm_intel_bo_emit_reloc(brw->batch.bo,
696
			   brw->wm.surf_offset[unit] + 4,
697
			   region->bo,
698
			   surf[1] - region->bo->offset,
699
			   I915_GEM_DOMAIN_RENDER,
700
			   I915_GEM_DOMAIN_RENDER);
701
}
702
 
703
/**
704
 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
705
 */
706
static void
707
brw_update_renderbuffer_surfaces(struct brw_context *brw)
708
{
709
    struct gl_context *ctx = &brw->ctx;
710
    GLuint i;
711
 
712
   /* _NEW_BUFFERS | _NEW_COLOR */
713
   /* Update surfaces for drawing buffers */
714
   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
715
      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
716
	 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
717
                brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
718
                                                      ctx->DrawBuffer->Layered, i);
719
            } else {
720
                brw->vtbl.update_null_renderbuffer_surface(brw, i);
721
            }
722
        }
723
    } else {
724
      brw->vtbl.update_null_renderbuffer_surface(brw, 0);
725
    }
726
    brw->state.dirty.brw |= BRW_NEW_SURFACES;
727
}
728
 
729
const struct brw_tracked_state brw_renderbuffer_surfaces = {
730
   .dirty = {
731
      .mesa = (_NEW_COLOR |
732
               _NEW_BUFFERS),
733
      .brw = BRW_NEW_BATCH,
734
      .cache = 0
735
   },
736
   .emit = brw_update_renderbuffer_surfaces,
737
};
738
 
739
const struct brw_tracked_state gen6_renderbuffer_surfaces = {
740
   .dirty = {
741
      .mesa = _NEW_BUFFERS,
742
      .brw = BRW_NEW_BATCH,
743
      .cache = 0
744
   },
745
   .emit = brw_update_renderbuffer_surfaces,
746
};
747
 
748
/**
749
 * Construct SURFACE_STATE objects for enabled textures.
750
 */
751
static void
752
brw_update_texture_surfaces(struct brw_context *brw)
753
{
754
   struct gl_context *ctx = &brw->ctx;
755
 
756
   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
757
    * Unfortunately, we're stuck using the gl_program structs until the
758
    * ARB_fragment_program front-end gets converted to GLSL IR.  These
759
    * have the downside that SamplerUnits is split and only contains the
760
    * mappings for samplers active in that stage.
761
    */
762
   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
763
   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
764
 
765
   unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
766
 
767
   for (unsigned s = 0; s < num_samplers; s++) {
768
      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
769
      brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
770
 
771
      if (vs->SamplersUsed & (1 << s)) {
772
         const unsigned unit = vs->SamplerUnits[s];
773
 
774
         /* _NEW_TEXTURE */
775
         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
776
            brw->vtbl.update_texture_surface(ctx, unit,
777
                                             brw->vs.surf_offset,
778
                                             SURF_INDEX_VS_TEXTURE(s));
779
         }
780
      }
781
 
782
      if (fs->SamplersUsed & (1 << s)) {
783
         const unsigned unit = fs->SamplerUnits[s];
784
 
785
         /* _NEW_TEXTURE */
786
         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
787
            brw->vtbl.update_texture_surface(ctx, unit,
788
                                             brw->wm.surf_offset,
789
                                             SURF_INDEX_TEXTURE(s));
790
         }
791
      }
792
   }
793
 
794
   brw->state.dirty.brw |= BRW_NEW_SURFACES;
795
}
796
 
797
const struct brw_tracked_state brw_texture_surfaces = {
798
   .dirty = {
799
      .mesa = _NEW_TEXTURE,
800
      .brw = BRW_NEW_BATCH |
4401 Serge 801
             BRW_NEW_UNIFORM_BUFFER |
4358 Serge 802
             BRW_NEW_VERTEX_PROGRAM |
803
             BRW_NEW_FRAGMENT_PROGRAM,
804
      .cache = 0
805
   },
806
   .emit = brw_update_texture_surfaces,
807
};
808
 
809
void
810
brw_upload_ubo_surfaces(struct brw_context *brw,
811
			struct gl_shader *shader,
812
			uint32_t *surf_offsets)
813
{
814
   struct gl_context *ctx = &brw->ctx;
815
 
816
   if (!shader)
817
      return;
818
 
819
   for (int i = 0; i < shader->NumUniformBlocks; i++) {
820
      struct gl_uniform_buffer_binding *binding;
821
      struct intel_buffer_object *intel_bo;
822
 
823
      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
824
      intel_bo = intel_buffer_object(binding->BufferObject);
825
      drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
826
 
827
      /* Because behavior for referencing outside of the binding's size in the
828
       * glBindBufferRange case is undefined, we can just bind the whole buffer
829
       * glBindBufferBase wants and be a correct implementation.
830
       */
831
      brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
832
                                        bo->size - binding->Offset,
833
                                        &surf_offsets[i],
834
                                        shader->Type == GL_FRAGMENT_SHADER);
835
   }
836
 
837
   if (shader->NumUniformBlocks)
838
      brw->state.dirty.brw |= BRW_NEW_SURFACES;
839
}
840
 
841
static void
842
brw_upload_wm_ubo_surfaces(struct brw_context *brw)
843
{
844
   struct gl_context *ctx = &brw->ctx;
845
   /* _NEW_PROGRAM */
846
   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
847
 
848
   if (!prog)
849
      return;
850
 
851
   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
852
			   &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
853
}
854
 
855
const struct brw_tracked_state brw_wm_ubo_surfaces = {
856
   .dirty = {
857
      .mesa = _NEW_PROGRAM,
858
      .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
859
      .cache = 0,
860
   },
861
   .emit = brw_upload_wm_ubo_surfaces,
862
};
863
 
864
/**
865
 * Constructs the binding table for the WM surface state, which maps unit
866
 * numbers to surface state objects.
867
 */
868
static void
869
brw_upload_wm_binding_table(struct brw_context *brw)
870
{
871
   uint32_t *bind;
872
   int i;
873
 
874
   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
875
      gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
876
   }
877
 
878
   /* Might want to calculate nr_surfaces first, to avoid taking up so much
879
    * space for the binding table.
880
    */
881
   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
882
			  sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
883
			  32, &brw->wm.bind_bo_offset);
884
 
885
   /* BRW_NEW_SURFACES */
886
   for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
887
      bind[i] = brw->wm.surf_offset[i];
888
   }
889
 
890
   brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
891
}
892
 
893
const struct brw_tracked_state brw_wm_binding_table = {
894
   .dirty = {
895
      .mesa = 0,
896
      .brw = (BRW_NEW_BATCH |
897
	      BRW_NEW_SURFACES),
898
      .cache = 0
899
   },
900
   .emit = brw_upload_wm_binding_table,
901
};
902
 
903
void
904
gen4_init_vtable_surface_functions(struct brw_context *brw)
905
{
906
   brw->vtbl.update_texture_surface = brw_update_texture_surface;
907
   brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
908
   brw->vtbl.update_null_renderbuffer_surface =
909
      brw_update_null_renderbuffer_surface;
910
   brw->vtbl.create_constant_surface = brw_create_constant_surface;
911
}