Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
 develop this 3D driver.
5
 
6
 Permission is hereby granted, free of charge, to any person obtaining
7
 a copy of this software and associated documentation files (the
8
 "Software"), to deal in the Software without restriction, including
9
 without limitation the rights to use, copy, modify, merge, publish,
10
 distribute, sublicense, and/or sell copies of the Software, and to
11
 permit persons to whom the Software is furnished to do so, subject to
12
 the following conditions:
13
 
14
 The above copyright notice and this permission notice (including the
15
 next paragraph) shall be included in all copies or substantial
16
 portions of the Software.
17
 
18
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 **********************************************************************/
27
 /*
28
  * Authors:
29
  *   Keith Whitwell 
30
  */
31
 
32
 
33
#include "main/context.h"
34
#include "main/blend.h"
35
#include "main/mtypes.h"
36
#include "main/samplerobj.h"
37
#include "program/prog_parameter.h"
38
 
39
#include "intel_mipmap_tree.h"
40
#include "intel_batchbuffer.h"
41
#include "intel_tex.h"
42
#include "intel_fbo.h"
43
#include "intel_buffer_objects.h"
44
 
45
#include "brw_context.h"
46
#include "brw_state.h"
47
#include "brw_defines.h"
48
#include "brw_wm.h"
49
 
50
GLuint
51
translate_tex_target(GLenum target)
52
{
53
   switch (target) {
54
   case GL_TEXTURE_1D:
55
   case GL_TEXTURE_1D_ARRAY_EXT:
56
      return BRW_SURFACE_1D;
57
 
58
   case GL_TEXTURE_RECTANGLE_NV:
59
      return BRW_SURFACE_2D;
60
 
61
   case GL_TEXTURE_2D:
62
   case GL_TEXTURE_2D_ARRAY_EXT:
63
   case GL_TEXTURE_EXTERNAL_OES:
64
   case GL_TEXTURE_2D_MULTISAMPLE:
65
   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66
      return BRW_SURFACE_2D;
67
 
68
   case GL_TEXTURE_3D:
69
      return BRW_SURFACE_3D;
70
 
71
   case GL_TEXTURE_CUBE_MAP:
72
   case GL_TEXTURE_CUBE_MAP_ARRAY:
73
      return BRW_SURFACE_CUBE;
74
 
75
   default:
76
      assert(0);
77
      return 0;
78
   }
79
}
80
 
81
uint32_t
82
brw_get_surface_tiling_bits(uint32_t tiling)
83
{
84
   switch (tiling) {
85
   case I915_TILING_X:
86
      return BRW_SURFACE_TILED;
87
   case I915_TILING_Y:
88
      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89
   default:
90
      return 0;
91
   }
92
}
93
 
94
 
95
uint32_t
96
brw_get_surface_num_multisamples(unsigned num_samples)
97
{
98
   if (num_samples > 1)
99
      return BRW_SURFACE_MULTISAMPLECOUNT_4;
100
   else
101
      return BRW_SURFACE_MULTISAMPLECOUNT_1;
102
}
103
 
104
 
105
/**
106
 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107
 * swizzling.
108
 */
109
int
110
brw_get_texture_swizzle(const struct gl_context *ctx,
111
                        const struct gl_texture_object *t)
112
{
113
   const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
 
115
   int swizzles[SWIZZLE_NIL + 1] = {
116
      SWIZZLE_X,
117
      SWIZZLE_Y,
118
      SWIZZLE_Z,
119
      SWIZZLE_W,
120
      SWIZZLE_ZERO,
121
      SWIZZLE_ONE,
122
      SWIZZLE_NIL
123
   };
124
 
125
   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126
       img->_BaseFormat == GL_DEPTH_STENCIL) {
127
      GLenum depth_mode = t->DepthMode;
128
 
129
      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130
       * with depth component data specified with a sized internal format.
131
       * Otherwise, it's left at the old default, GL_LUMINANCE.
132
       */
133
      if (_mesa_is_gles3(ctx) &&
134
          img->InternalFormat != GL_DEPTH_COMPONENT &&
135
          img->InternalFormat != GL_DEPTH_STENCIL) {
136
         depth_mode = GL_RED;
137
      }
138
 
139
      switch (depth_mode) {
140
      case GL_ALPHA:
141
         swizzles[0] = SWIZZLE_ZERO;
142
         swizzles[1] = SWIZZLE_ZERO;
143
         swizzles[2] = SWIZZLE_ZERO;
144
         swizzles[3] = SWIZZLE_X;
145
         break;
146
      case GL_LUMINANCE:
147
         swizzles[0] = SWIZZLE_X;
148
         swizzles[1] = SWIZZLE_X;
149
         swizzles[2] = SWIZZLE_X;
150
         swizzles[3] = SWIZZLE_ONE;
151
         break;
152
      case GL_INTENSITY:
153
         swizzles[0] = SWIZZLE_X;
154
         swizzles[1] = SWIZZLE_X;
155
         swizzles[2] = SWIZZLE_X;
156
         swizzles[3] = SWIZZLE_X;
157
         break;
158
      case GL_RED:
159
         swizzles[0] = SWIZZLE_X;
160
         swizzles[1] = SWIZZLE_ZERO;
161
         swizzles[2] = SWIZZLE_ZERO;
162
         swizzles[3] = SWIZZLE_ONE;
163
         break;
164
      }
165
   }
166
 
167
   /* If the texture's format is alpha-only, force R, G, and B to
168
    * 0.0. Similarly, if the texture's format has no alpha channel,
169
    * force the alpha value read to 1.0. This allows for the
170
    * implementation to use an RGBA texture for any of these formats
171
    * without leaking any unexpected values.
172
    */
173
   switch (img->_BaseFormat) {
174
   case GL_ALPHA:
175
      swizzles[0] = SWIZZLE_ZERO;
176
      swizzles[1] = SWIZZLE_ZERO;
177
      swizzles[2] = SWIZZLE_ZERO;
178
      break;
179
   case GL_RED:
180
   case GL_RG:
181
   case GL_RGB:
182
      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183
         swizzles[3] = SWIZZLE_ONE;
184
      break;
185
   }
186
 
187
   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188
                        swizzles[GET_SWZ(t->_Swizzle, 1)],
189
                        swizzles[GET_SWZ(t->_Swizzle, 2)],
190
                        swizzles[GET_SWZ(t->_Swizzle, 3)]);
191
}
192
 
193
 
194
static void
195
brw_update_buffer_texture_surface(struct gl_context *ctx,
196
                                  unsigned unit,
197
                                  uint32_t *binding_table,
198
                                  unsigned surf_index)
199
{
200
   struct brw_context *brw = brw_context(ctx);
201
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
202
   uint32_t *surf;
203
   struct intel_buffer_object *intel_obj =
204
      intel_buffer_object(tObj->BufferObject);
205
   drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
206
   gl_format format = tObj->_BufferObjectFormat;
207
   uint32_t brw_format = brw_format_for_mesa_format(format);
208
   int texel_size = _mesa_get_format_bytes(format);
209
 
210
   if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
211
      _mesa_problem(NULL, "bad format %s for texture buffer\n",
212
		    _mesa_get_format_name(format));
213
   }
214
 
215
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
216
			  6 * 4, 32, &binding_table[surf_index]);
217
 
218
   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
219
	      (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
220
 
221
   if (brw->gen >= 6)
222
      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
223
 
224
   if (bo) {
225
      surf[1] = bo->offset; /* reloc */
226
 
227
      /* Emit relocation to surface contents. */
228
      drm_intel_bo_emit_reloc(brw->batch.bo,
229
			      binding_table[surf_index] + 4,
230
			      bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
231
 
232
      int w = intel_obj->Base.Size / texel_size;
233
      surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
234
		 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
235
      surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
236
		 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
237
   } else {
238
      surf[1] = 0;
239
      surf[2] = 0;
240
      surf[3] = 0;
241
   }
242
 
243
   surf[4] = 0;
244
   surf[5] = 0;
245
}
246
 
247
static void
248
brw_update_texture_surface(struct gl_context *ctx,
249
                           unsigned unit,
250
                           uint32_t *binding_table,
251
                           unsigned surf_index)
252
{
253
   struct brw_context *brw = brw_context(ctx);
254
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
255
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
256
   struct intel_mipmap_tree *mt = intelObj->mt;
257
   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
258
   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
259
   uint32_t *surf;
260
   uint32_t tile_x, tile_y;
261
 
262
   if (tObj->Target == GL_TEXTURE_BUFFER) {
263
      brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
264
      return;
265
   }
266
 
267
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
268
			  6 * 4, 32, &binding_table[surf_index]);
269
 
270
   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
271
	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
272
	      BRW_SURFACE_CUBEFACE_ENABLES |
273
	      (translate_tex_format(brw,
274
                                    mt->format,
275
				    tObj->DepthMode,
276
				    sampler->sRGBDecode) <<
277
	       BRW_SURFACE_FORMAT_SHIFT));
278
 
279
   surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
280
   surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
281
                                             &tile_x, &tile_y);
282
 
283
   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
284
	      (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
285
	      (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
286
 
287
   surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
288
	      (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
289
	      (intelObj->mt->region->pitch - 1) <<
290
	      BRW_SURFACE_PITCH_SHIFT);
291
 
292
   surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
293
 
294
   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
295
   /* Note that the low bits of these fields are missing, so
296
    * there's the possibility of getting in trouble.
297
    */
298
   assert(tile_x % 4 == 0);
299
   assert(tile_y % 2 == 0);
300
   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
301
	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
302
	      (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
303
 
304
   /* Emit relocation to surface contents */
305
   drm_intel_bo_emit_reloc(brw->batch.bo,
306
			   binding_table[surf_index] + 4,
307
			   intelObj->mt->region->bo,
308
                           surf[1] - intelObj->mt->region->bo->offset,
309
			   I915_GEM_DOMAIN_SAMPLER, 0);
310
}
311
 
312
/**
313
 * Create the constant buffer surface.  Vertex/fragment shader constants will be
314
 * read from this buffer with Data Port Read instructions/messages.
315
 */
316
static void
317
brw_create_constant_surface(struct brw_context *brw,
318
			    drm_intel_bo *bo,
319
			    uint32_t offset,
320
			    uint32_t size,
321
			    uint32_t *out_offset,
322
                            bool dword_pitch)
323
{
324
   uint32_t stride = dword_pitch ? 4 : 16;
325
   uint32_t elements = ALIGN(size, stride) / stride;
326
   const GLint w = elements - 1;
327
   uint32_t *surf;
328
 
329
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
330
			  6 * 4, 32, out_offset);
331
 
332
   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
333
	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
334
	      BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
335
 
336
   if (brw->gen >= 6)
337
      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
338
 
339
   surf[1] = bo->offset + offset; /* reloc */
340
 
341
   surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
342
	      ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
343
 
344
   surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
345
	      (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
346
 
347
   surf[4] = 0;
348
   surf[5] = 0;
349
 
350
   /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
351
    * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
352
    * physical cache.  It is mapped in hardware to the sampler cache."
353
    */
354
   drm_intel_bo_emit_reloc(brw->batch.bo,
355
			   *out_offset + 4,
356
			   bo, offset,
357
			   I915_GEM_DOMAIN_SAMPLER, 0);
358
}
359
 
360
/**
361
 * Set up a binding table entry for use by stream output logic (transform
362
 * feedback).
363
 *
364
 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
365
 */
366
void
367
brw_update_sol_surface(struct brw_context *brw,
368
                       struct gl_buffer_object *buffer_obj,
369
                       uint32_t *out_offset, unsigned num_vector_components,
370
                       unsigned stride_dwords, unsigned offset_dwords)
371
{
372
   struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
373
   drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
374
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
375
                                    out_offset);
376
   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
377
   uint32_t offset_bytes = 4 * offset_dwords;
378
   size_t size_dwords = buffer_obj->Size / 4;
379
   uint32_t buffer_size_minus_1, width, height, depth, surface_format;
380
 
381
   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
382
    * too big to map using a single binding table entry?
383
    */
384
   assert((size_dwords - offset_dwords) / stride_dwords
385
          <= BRW_MAX_NUM_BUFFER_ENTRIES);
386
 
387
   if (size_dwords > offset_dwords + num_vector_components) {
388
      /* There is room for at least 1 transform feedback output in the buffer.
389
       * Compute the number of additional transform feedback outputs the
390
       * buffer has room for.
391
       */
392
      buffer_size_minus_1 =
393
         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
394
   } else {
395
      /* There isn't even room for a single transform feedback output in the
396
       * buffer.  We can't configure the binding table entry to prevent output
397
       * entirely; we'll have to rely on the geometry shader to detect
398
       * overflow.  But to minimize the damage in case of a bug, set up the
399
       * binding table entry to just allow a single output.
400
       */
401
      buffer_size_minus_1 = 0;
402
   }
403
   width = buffer_size_minus_1 & 0x7f;
404
   height = (buffer_size_minus_1 & 0xfff80) >> 7;
405
   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
406
 
407
   switch (num_vector_components) {
408
   case 1:
409
      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
410
      break;
411
   case 2:
412
      surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
413
      break;
414
   case 3:
415
      surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
416
      break;
417
   case 4:
418
      surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
419
      break;
420
   default:
421
      assert(!"Invalid vector size for transform feedback output");
422
      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
423
      break;
424
   }
425
 
426
   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
427
      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
428
      surface_format << BRW_SURFACE_FORMAT_SHIFT |
429
      BRW_SURFACE_RC_READ_WRITE;
430
   surf[1] = bo->offset + offset_bytes; /* reloc */
431
   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
432
	      height << BRW_SURFACE_HEIGHT_SHIFT);
433
   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
434
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
435
   surf[4] = 0;
436
   surf[5] = 0;
437
 
438
   /* Emit relocation to surface contents. */
439
   drm_intel_bo_emit_reloc(brw->batch.bo,
440
			   *out_offset + 4,
441
			   bo, offset_bytes,
442
			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
443
}
444
 
445
/* Creates a new WM constant buffer reflecting the current fragment program's
446
 * constants, if needed by the fragment program.
447
 *
448
 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
449
 * state atom.
450
 */
451
static void
452
brw_upload_wm_pull_constants(struct brw_context *brw)
453
{
454
   struct gl_context *ctx = &brw->ctx;
455
   /* BRW_NEW_FRAGMENT_PROGRAM */
456
   struct brw_fragment_program *fp =
457
      (struct brw_fragment_program *) brw->fragment_program;
458
   struct gl_program_parameter_list *params = fp->program.Base.Parameters;
459
   const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
460
   const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
461
   float *constants;
462
   unsigned int i;
463
 
464
   _mesa_load_state_parameters(ctx, params);
465
 
466
   /* CACHE_NEW_WM_PROG */
467
   if (brw->wm.prog_data->nr_pull_params == 0) {
468
      if (brw->wm.const_bo) {
469
	 drm_intel_bo_unreference(brw->wm.const_bo);
470
	 brw->wm.const_bo = NULL;
471
	 brw->wm.surf_offset[surf_index] = 0;
472
	 brw->state.dirty.brw |= BRW_NEW_SURFACES;
473
      }
474
      return;
475
   }
476
 
477
   drm_intel_bo_unreference(brw->wm.const_bo);
478
   brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
479
					 size, 64);
480
 
481
   /* _NEW_PROGRAM_CONSTANTS */
482
   drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
483
   constants = brw->wm.const_bo->virtual;
484
   for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
485
      constants[i] = *brw->wm.prog_data->pull_param[i];
486
   }
487
   drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
488
 
489
   brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
490
                                     &brw->wm.surf_offset[surf_index],
491
                                     true);
492
 
493
   brw->state.dirty.brw |= BRW_NEW_SURFACES;
494
}
495
 
496
const struct brw_tracked_state brw_wm_pull_constants = {
497
   .dirty = {
498
      .mesa = (_NEW_PROGRAM_CONSTANTS),
499
      .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
500
      .cache = CACHE_NEW_WM_PROG,
501
   },
502
   .emit = brw_upload_wm_pull_constants,
503
};
504
 
505
static void
506
brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
507
{
508
   /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
509
    * Notes):
510
    *
511
    *     A null surface will be used in instances where an actual surface is
512
    *     not bound. When a write message is generated to a null surface, no
513
    *     actual surface is written to. When a read message (including any
514
    *     sampling engine message) is generated to a null surface, the result
515
    *     is all zeros. Note that a null surface type is allowed to be used
516
    *     with all messages, even if it is not specificially indicated as
517
    *     supported. All of the remaining fields in surface state are ignored
518
    *     for null surfaces, with the following exceptions:
519
    *
520
    *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
521
    *       depth buffer’s corresponding state for all render target surfaces,
522
    *       including null.
523
    *
524
    *     - Surface Format must be R8G8B8A8_UNORM.
525
    */
526
   struct gl_context *ctx = &brw->ctx;
527
   uint32_t *surf;
528
   unsigned surface_type = BRW_SURFACE_NULL;
529
   drm_intel_bo *bo = NULL;
530
   unsigned pitch_minus_1 = 0;
531
   uint32_t multisampling_state = 0;
532
 
533
   /* _NEW_BUFFERS */
534
   const struct gl_framebuffer *fb = ctx->DrawBuffer;
535
 
536
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
537
			  6 * 4, 32, &brw->wm.surf_offset[unit]);
538
 
539
   if (fb->Visual.samples > 1) {
540
      /* On Gen6, null render targets seem to cause GPU hangs when
541
       * multisampling.  So work around this problem by rendering into dummy
542
       * color buffer.
543
       *
544
       * To decrease the amount of memory needed by the workaround buffer, we
545
       * set its pitch to 128 bytes (the width of a Y tile).  This means that
546
       * the amount of memory needed for the workaround buffer is
547
       * (width_in_tiles + height_in_tiles - 1) tiles.
548
       *
549
       * Note that since the workaround buffer will be interpreted by the
550
       * hardware as an interleaved multisampled buffer, we need to compute
551
       * width_in_tiles and height_in_tiles by dividing the width and height
552
       * by 16 rather than the normal Y-tile size of 32.
553
       */
554
      unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
555
      unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
556
      unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
557
      brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
558
                         size_needed);
559
      bo = brw->wm.multisampled_null_render_target_bo;
560
      surface_type = BRW_SURFACE_2D;
561
      pitch_minus_1 = 127;
562
      multisampling_state =
563
         brw_get_surface_num_multisamples(fb->Visual.samples);
564
   }
565
 
566
   surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
567
	      BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
568
   if (brw->gen < 6) {
569
      surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
570
		  1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
571
		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
572
		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
573
   }
574
   surf[1] = bo ? bo->offset : 0;
575
   surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
576
              (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
577
 
578
   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
579
    * Notes):
580
    *
581
    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
582
    */
583
   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
584
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
585
   surf[4] = multisampling_state;
586
   surf[5] = 0;
587
 
588
   if (bo) {
589
      drm_intel_bo_emit_reloc(brw->batch.bo,
590
                              brw->wm.surf_offset[unit] + 4,
591
                              bo, 0,
592
                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
593
   }
594
}
595
 
596
/**
597
 * Sets up a surface state structure to point at the given region.
598
 * While it is only used for the front/back buffer currently, it should be
599
 * usable for further buffers when doing ARB_draw_buffer support.
600
 */
601
static void
602
brw_update_renderbuffer_surface(struct brw_context *brw,
603
				struct gl_renderbuffer *rb,
604
				bool layered,
605
				unsigned int unit)
606
{
607
   struct gl_context *ctx = &brw->ctx;
608
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
609
   struct intel_mipmap_tree *mt = irb->mt;
610
   struct intel_region *region;
611
   uint32_t *surf;
612
   uint32_t tile_x, tile_y;
613
   uint32_t format = 0;
614
   /* _NEW_BUFFERS */
615
   gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
616
 
617
   assert(!layered);
618
 
619
   if (rb->TexImage && !brw->has_surface_tile_offset) {
620
      intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
621
 
622
      if (tile_x != 0 || tile_y != 0) {
623
	 /* Original gen4 hardware couldn't draw to a non-tile-aligned
624
	  * destination in a miptree unless you actually setup your renderbuffer
625
	  * as a miptree and used the fragile lod/array_index/etc. controls to
626
	  * select the image.  So, instead, we just make a new single-level
627
	  * miptree and render into that.
628
	  */
629
	 intel_renderbuffer_move_to_temp(brw, irb, false);
630
	 mt = irb->mt;
631
      }
632
   }
633
 
634
   intel_miptree_used_for_rendering(irb->mt);
635
 
636
   region = irb->mt->region;
637
 
638
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
639
			  6 * 4, 32, &brw->wm.surf_offset[unit]);
640
 
641
   format = brw->render_target_format[rb_format];
642
   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
643
      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
644
                    __FUNCTION__, _mesa_get_format_name(rb_format));
645
   }
646
 
647
   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
648
	      format << BRW_SURFACE_FORMAT_SHIFT);
649
 
650
   /* reloc */
651
   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
652
	      region->bo->offset);
653
 
654
   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
655
	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
656
 
657
   surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
658
	      (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
659
 
660
   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
661
 
662
   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
663
   /* Note that the low bits of these fields are missing, so
664
    * there's the possibility of getting in trouble.
665
    */
666
   assert(tile_x % 4 == 0);
667
   assert(tile_y % 2 == 0);
668
   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
669
	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
670
	      (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
671
 
672
   if (brw->gen < 6) {
673
      /* _NEW_COLOR */
674
      if (!ctx->Color.ColorLogicOpEnabled &&
675
	  (ctx->Color.BlendEnabled & (1 << unit)))
676
	 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
677
 
678
      if (!ctx->Color.ColorMask[unit][0])
679
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
680
      if (!ctx->Color.ColorMask[unit][1])
681
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
682
      if (!ctx->Color.ColorMask[unit][2])
683
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
684
 
685
      /* As mentioned above, disable writes to the alpha component when the
686
       * renderbuffer is XRGB.
687
       */
688
      if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
689
	  !ctx->Color.ColorMask[unit][3]) {
690
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
691
      }
692
   }
693
#if 0
694
   printf("brw_update_renderbuffer_surface\n"
695
          "bind bo(handle=%d format=%d width=%d height=%d\n"
696
          "pitch=%d, tiling=%d\n"
697
          "ss[0] %x ss[1] %x ss[2] %x ss[3] %x ss[4] %x ss[5] %x\n",
698
	      region->bo->handle, format, rb->Width, rb->Height,
699
          region->pitch, region->tiling,
700
          surf[0],surf[1],surf[2],surf[3], surf[4],surf[5]);
701
#endif
702
 
703
   drm_intel_bo_emit_reloc(brw->batch.bo,
704
			   brw->wm.surf_offset[unit] + 4,
705
			   region->bo,
706
			   surf[1] - region->bo->offset,
707
			   I915_GEM_DOMAIN_RENDER,
708
			   I915_GEM_DOMAIN_RENDER);
709
}
710
 
711
/**
712
 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
713
 */
714
static void
715
brw_update_renderbuffer_surfaces(struct brw_context *brw)
716
{
717
    struct gl_context *ctx = &brw->ctx;
718
    GLuint i;
719
 
720
   /* _NEW_BUFFERS | _NEW_COLOR */
721
   /* Update surfaces for drawing buffers */
722
   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
723
      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
724
	 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
725
                brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
726
                                                      ctx->DrawBuffer->Layered, i);
727
            } else {
728
                brw->vtbl.update_null_renderbuffer_surface(brw, i);
729
            }
730
        }
731
    } else {
732
      brw->vtbl.update_null_renderbuffer_surface(brw, 0);
733
    }
734
    brw->state.dirty.brw |= BRW_NEW_SURFACES;
735
}
736
 
737
const struct brw_tracked_state brw_renderbuffer_surfaces = {
738
   .dirty = {
739
      .mesa = (_NEW_COLOR |
740
               _NEW_BUFFERS),
741
      .brw = BRW_NEW_BATCH,
742
      .cache = 0
743
   },
744
   .emit = brw_update_renderbuffer_surfaces,
745
};
746
 
747
const struct brw_tracked_state gen6_renderbuffer_surfaces = {
748
   .dirty = {
749
      .mesa = _NEW_BUFFERS,
750
      .brw = BRW_NEW_BATCH,
751
      .cache = 0
752
   },
753
   .emit = brw_update_renderbuffer_surfaces,
754
};
755
 
756
/**
757
 * Construct SURFACE_STATE objects for enabled textures.
758
 */
759
static void
760
brw_update_texture_surfaces(struct brw_context *brw)
761
{
762
   struct gl_context *ctx = &brw->ctx;
763
 
764
   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
765
    * Unfortunately, we're stuck using the gl_program structs until the
766
    * ARB_fragment_program front-end gets converted to GLSL IR.  These
767
    * have the downside that SamplerUnits is split and only contains the
768
    * mappings for samplers active in that stage.
769
    */
770
   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
771
   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
772
 
773
   unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
774
 
775
   for (unsigned s = 0; s < num_samplers; s++) {
776
      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
777
      brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
778
 
779
      if (vs->SamplersUsed & (1 << s)) {
780
         const unsigned unit = vs->SamplerUnits[s];
781
 
782
         /* _NEW_TEXTURE */
783
         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
784
            brw->vtbl.update_texture_surface(ctx, unit,
785
                                             brw->vs.surf_offset,
786
                                             SURF_INDEX_VS_TEXTURE(s));
787
         }
788
      }
789
 
790
      if (fs->SamplersUsed & (1 << s)) {
791
         const unsigned unit = fs->SamplerUnits[s];
792
 
793
         /* _NEW_TEXTURE */
794
         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
795
            brw->vtbl.update_texture_surface(ctx, unit,
796
                                             brw->wm.surf_offset,
797
                                             SURF_INDEX_TEXTURE(s));
798
         }
799
      }
800
   }
801
 
802
   brw->state.dirty.brw |= BRW_NEW_SURFACES;
803
}
804
 
805
const struct brw_tracked_state brw_texture_surfaces = {
806
   .dirty = {
807
      .mesa = _NEW_TEXTURE,
808
      .brw = BRW_NEW_BATCH |
809
             BRW_NEW_VERTEX_PROGRAM |
810
             BRW_NEW_FRAGMENT_PROGRAM,
811
      .cache = 0
812
   },
813
   .emit = brw_update_texture_surfaces,
814
};
815
 
816
void
817
brw_upload_ubo_surfaces(struct brw_context *brw,
818
			struct gl_shader *shader,
819
			uint32_t *surf_offsets)
820
{
821
   struct gl_context *ctx = &brw->ctx;
822
 
823
   if (!shader)
824
      return;
825
 
826
   for (int i = 0; i < shader->NumUniformBlocks; i++) {
827
      struct gl_uniform_buffer_binding *binding;
828
      struct intel_buffer_object *intel_bo;
829
 
830
      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
831
      intel_bo = intel_buffer_object(binding->BufferObject);
832
      drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
833
 
834
      /* Because behavior for referencing outside of the binding's size in the
835
       * glBindBufferRange case is undefined, we can just bind the whole buffer
836
       * glBindBufferBase wants and be a correct implementation.
837
       */
838
      brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
839
                                        bo->size - binding->Offset,
840
                                        &surf_offsets[i],
841
                                        shader->Type == GL_FRAGMENT_SHADER);
842
   }
843
 
844
   if (shader->NumUniformBlocks)
845
      brw->state.dirty.brw |= BRW_NEW_SURFACES;
846
}
847
 
848
static void
849
brw_upload_wm_ubo_surfaces(struct brw_context *brw)
850
{
851
   struct gl_context *ctx = &brw->ctx;
852
   /* _NEW_PROGRAM */
853
   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
854
 
855
   if (!prog)
856
      return;
857
 
858
   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
859
			   &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
860
}
861
 
862
const struct brw_tracked_state brw_wm_ubo_surfaces = {
863
   .dirty = {
864
      .mesa = _NEW_PROGRAM,
865
      .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
866
      .cache = 0,
867
   },
868
   .emit = brw_upload_wm_ubo_surfaces,
869
};
870
 
871
/**
872
 * Constructs the binding table for the WM surface state, which maps unit
873
 * numbers to surface state objects.
874
 */
875
static void
876
brw_upload_wm_binding_table(struct brw_context *brw)
877
{
878
   uint32_t *bind;
879
   int i;
880
 
881
   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
882
      gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
883
   }
884
 
885
   /* Might want to calculate nr_surfaces first, to avoid taking up so much
886
    * space for the binding table.
887
    */
888
   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
889
			  sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
890
			  32, &brw->wm.bind_bo_offset);
891
 
892
   /* BRW_NEW_SURFACES */
893
   for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
894
      bind[i] = brw->wm.surf_offset[i];
895
   }
896
 
897
   brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
898
}
899
 
900
const struct brw_tracked_state brw_wm_binding_table = {
901
   .dirty = {
902
      .mesa = 0,
903
      .brw = (BRW_NEW_BATCH |
904
	      BRW_NEW_SURFACES),
905
      .cache = 0
906
   },
907
   .emit = brw_upload_wm_binding_table,
908
};
909
 
910
void
911
gen4_init_vtable_surface_functions(struct brw_context *brw)
912
{
913
   brw->vtbl.update_texture_surface = brw_update_texture_surface;
914
   brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
915
   brw->vtbl.update_null_renderbuffer_surface =
916
      brw_update_null_renderbuffer_surface;
917
   brw->vtbl.create_constant_surface = brw_create_constant_surface;
918
}