Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
 Intel funded Tungsten Graphics to
4
 develop this 3D driver.
5
 
6
 Permission is hereby granted, free of charge, to any person obtaining
7
 a copy of this software and associated documentation files (the
8
 "Software"), to deal in the Software without restriction, including
9
 without limitation the rights to use, copy, modify, merge, publish,
10
 distribute, sublicense, and/or sell copies of the Software, and to
11
 permit persons to whom the Software is furnished to do so, subject to
12
 the following conditions:
13
 
14
 The above copyright notice and this permission notice (including the
15
 next paragraph) shall be included in all copies or substantial
16
 portions of the Software.
17
 
18
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 **********************************************************************/
27
 /*
28
  * Authors:
29
  *   Keith Whitwell 
30
  */
31
 
32
 
33
#include "main/context.h"
34
#include "main/blend.h"
35
#include "main/mtypes.h"
36
#include "main/samplerobj.h"
37
#include "program/prog_parameter.h"
38
 
39
#include "intel_mipmap_tree.h"
40
#include "intel_batchbuffer.h"
41
#include "intel_tex.h"
42
#include "intel_fbo.h"
43
#include "intel_buffer_objects.h"
44
 
45
#include "brw_context.h"
46
#include "brw_state.h"
47
#include "brw_defines.h"
48
#include "brw_wm.h"
49
 
50
GLuint
51
translate_tex_target(GLenum target)
52
{
53
   switch (target) {
54
   case GL_TEXTURE_1D:
55
   case GL_TEXTURE_1D_ARRAY_EXT:
56
      return BRW_SURFACE_1D;
57
 
58
   case GL_TEXTURE_RECTANGLE_NV:
59
      return BRW_SURFACE_2D;
60
 
61
   case GL_TEXTURE_2D:
62
   case GL_TEXTURE_2D_ARRAY_EXT:
63
   case GL_TEXTURE_EXTERNAL_OES:
64
   case GL_TEXTURE_2D_MULTISAMPLE:
65
   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66
      return BRW_SURFACE_2D;
67
 
68
   case GL_TEXTURE_3D:
69
      return BRW_SURFACE_3D;
70
 
71
   case GL_TEXTURE_CUBE_MAP:
72
   case GL_TEXTURE_CUBE_MAP_ARRAY:
73
      return BRW_SURFACE_CUBE;
74
 
75
   default:
76
      unreachable("not reached");
77
   }
78
}
79
 
80
uint32_t
81
brw_get_surface_tiling_bits(uint32_t tiling)
82
{
83
   switch (tiling) {
84
   case I915_TILING_X:
85
      return BRW_SURFACE_TILED;
86
   case I915_TILING_Y:
87
      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88
   default:
89
      return 0;
90
   }
91
}
92
 
93
 
94
uint32_t
95
brw_get_surface_num_multisamples(unsigned num_samples)
96
{
97
   if (num_samples > 1)
98
      return BRW_SURFACE_MULTISAMPLECOUNT_4;
99
   else
100
      return BRW_SURFACE_MULTISAMPLECOUNT_1;
101
}
102
 
103
void
104
brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105
                      bool is_render_target,
106
                      unsigned *width, unsigned *height,
107
                      unsigned *pitch, uint32_t *tiling, unsigned *format)
108
{
109
   static const unsigned halign_stencil = 8;
110
 
111
   /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112
    * there are half as many rows.
113
    * In addition, mip-levels are accessed manually by the program and
114
    * therefore the surface is setup to cover all the mip-levels for one slice.
115
    * (Hardware is still used to access individual slices).
116
    */
117
   *tiling = I915_TILING_Y;
118
   *pitch = mt->pitch * 2;
119
   *width = ALIGN(mt->total_width, halign_stencil) * 2;
120
   *height = (mt->total_height / mt->physical_depth0) / 2;
121
 
122
   if (is_render_target) {
123
      *format = BRW_SURFACEFORMAT_R8_UINT;
124
   }
125
}
126
 
127
 
128
/**
129
 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130
 * swizzling.
131
 */
132
int
133
brw_get_texture_swizzle(const struct gl_context *ctx,
134
                        const struct gl_texture_object *t)
135
{
136
   const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
 
138
   int swizzles[SWIZZLE_NIL + 1] = {
139
      SWIZZLE_X,
140
      SWIZZLE_Y,
141
      SWIZZLE_Z,
142
      SWIZZLE_W,
143
      SWIZZLE_ZERO,
144
      SWIZZLE_ONE,
145
      SWIZZLE_NIL
146
   };
147
 
148
   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149
       img->_BaseFormat == GL_DEPTH_STENCIL) {
150
      GLenum depth_mode = t->DepthMode;
151
 
152
      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153
       * with depth component data specified with a sized internal format.
154
       * Otherwise, it's left at the old default, GL_LUMINANCE.
155
       */
156
      if (_mesa_is_gles3(ctx) &&
157
          img->InternalFormat != GL_DEPTH_COMPONENT &&
158
          img->InternalFormat != GL_DEPTH_STENCIL) {
159
         depth_mode = GL_RED;
160
      }
161
 
162
      switch (depth_mode) {
163
      case GL_ALPHA:
164
         swizzles[0] = SWIZZLE_ZERO;
165
         swizzles[1] = SWIZZLE_ZERO;
166
         swizzles[2] = SWIZZLE_ZERO;
167
         swizzles[3] = SWIZZLE_X;
168
         break;
169
      case GL_LUMINANCE:
170
         swizzles[0] = SWIZZLE_X;
171
         swizzles[1] = SWIZZLE_X;
172
         swizzles[2] = SWIZZLE_X;
173
         swizzles[3] = SWIZZLE_ONE;
174
         break;
175
      case GL_INTENSITY:
176
         swizzles[0] = SWIZZLE_X;
177
         swizzles[1] = SWIZZLE_X;
178
         swizzles[2] = SWIZZLE_X;
179
         swizzles[3] = SWIZZLE_X;
180
         break;
181
      case GL_RED:
182
         swizzles[0] = SWIZZLE_X;
183
         swizzles[1] = SWIZZLE_ZERO;
184
         swizzles[2] = SWIZZLE_ZERO;
185
         swizzles[3] = SWIZZLE_ONE;
186
         break;
187
      }
188
   }
189
 
190
   GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
191
 
192
   /* If the texture's format is alpha-only, force R, G, and B to
193
    * 0.0. Similarly, if the texture's format has no alpha channel,
194
    * force the alpha value read to 1.0. This allows for the
195
    * implementation to use an RGBA texture for any of these formats
196
    * without leaking any unexpected values.
197
    */
198
   switch (img->_BaseFormat) {
199
   case GL_ALPHA:
200
      swizzles[0] = SWIZZLE_ZERO;
201
      swizzles[1] = SWIZZLE_ZERO;
202
      swizzles[2] = SWIZZLE_ZERO;
203
      break;
204
   case GL_LUMINANCE:
205
      if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
206
         swizzles[0] = SWIZZLE_X;
207
         swizzles[1] = SWIZZLE_X;
208
         swizzles[2] = SWIZZLE_X;
209
         swizzles[3] = SWIZZLE_ONE;
210
      }
211
      break;
212
   case GL_LUMINANCE_ALPHA:
213
      if (datatype == GL_SIGNED_NORMALIZED) {
214
         swizzles[0] = SWIZZLE_X;
215
         swizzles[1] = SWIZZLE_X;
216
         swizzles[2] = SWIZZLE_X;
217
         swizzles[3] = SWIZZLE_W;
218
      }
219
      break;
220
   case GL_INTENSITY:
221
      if (datatype == GL_SIGNED_NORMALIZED) {
222
         swizzles[0] = SWIZZLE_X;
223
         swizzles[1] = SWIZZLE_X;
224
         swizzles[2] = SWIZZLE_X;
225
         swizzles[3] = SWIZZLE_X;
226
      }
227
      break;
228
   case GL_RED:
229
   case GL_RG:
230
   case GL_RGB:
231
      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
232
         swizzles[3] = SWIZZLE_ONE;
233
      break;
234
   }
235
 
236
   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
237
                        swizzles[GET_SWZ(t->_Swizzle, 1)],
238
                        swizzles[GET_SWZ(t->_Swizzle, 2)],
239
                        swizzles[GET_SWZ(t->_Swizzle, 3)]);
240
}
241
 
242
static void
243
gen4_emit_buffer_surface_state(struct brw_context *brw,
244
                               uint32_t *out_offset,
245
                               drm_intel_bo *bo,
246
                               unsigned buffer_offset,
247
                               unsigned surface_format,
248
                               unsigned buffer_size,
249
                               unsigned pitch,
250
                               bool rw)
251
{
252
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
253
                                    6 * 4, 32, out_offset);
254
   memset(surf, 0, 6 * 4);
255
 
256
   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
257
             surface_format << BRW_SURFACE_FORMAT_SHIFT |
258
             (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
259
   surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
260
   surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
261
             ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
262
   surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
263
             (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
264
 
265
   /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
266
    * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
267
    * physical cache.  It is mapped in hardware to the sampler cache."
268
    */
269
   if (bo) {
270
      drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
271
                              bo, buffer_offset,
272
                              I915_GEM_DOMAIN_SAMPLER,
273
                              (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
274
   }
275
}
276
 
277
void
278
brw_update_buffer_texture_surface(struct gl_context *ctx,
279
                                  unsigned unit,
280
                                  uint32_t *surf_offset)
281
{
282
   struct brw_context *brw = brw_context(ctx);
283
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
284
   struct intel_buffer_object *intel_obj =
285
      intel_buffer_object(tObj->BufferObject);
286
   uint32_t size = tObj->BufferSize;
287
   drm_intel_bo *bo = NULL;
288
   mesa_format format = tObj->_BufferObjectFormat;
289
   uint32_t brw_format = brw_format_for_mesa_format(format);
290
   int texel_size = _mesa_get_format_bytes(format);
291
 
292
   if (intel_obj) {
293
      size = MIN2(size, intel_obj->Base.Size);
294
      bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
295
   }
296
 
297
   if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
298
      _mesa_problem(NULL, "bad format %s for texture buffer\n",
299
		    _mesa_get_format_name(format));
300
   }
301
 
302
   brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
303
                                       tObj->BufferOffset,
304
                                       brw_format,
305
                                       size / texel_size,
306
                                       texel_size,
307
                                       false /* rw */);
308
}
309
 
310
static void
311
brw_update_texture_surface(struct gl_context *ctx,
312
                           unsigned unit,
313
                           uint32_t *surf_offset,
314
                           bool for_gather)
315
{
316
   struct brw_context *brw = brw_context(ctx);
317
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
318
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
319
   struct intel_mipmap_tree *mt = intelObj->mt;
320
   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
321
   uint32_t *surf;
322
 
323
   /* BRW_NEW_TEXTURE_BUFFER */
324
   if (tObj->Target == GL_TEXTURE_BUFFER) {
325
      brw_update_buffer_texture_surface(ctx, unit, surf_offset);
326
      return;
327
   }
328
 
329
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
330
			  6 * 4, 32, surf_offset);
331
 
332
   uint32_t tex_format = translate_tex_format(brw, mt->format,
333
                                              sampler->sRGBDecode);
334
 
335
   if (for_gather) {
336
      /* Sandybridge's gather4 message is broken for integer formats.
337
       * To work around this, we pretend the surface is UNORM for
338
       * 8 or 16-bit formats, and emit shader instructions to recover
339
       * the real INT/UINT value.  For 32-bit formats, we pretend
340
       * the surface is FLOAT, and simply reinterpret the resulting
341
       * bits.
342
       */
343
      switch (tex_format) {
344
      case BRW_SURFACEFORMAT_R8_SINT:
345
      case BRW_SURFACEFORMAT_R8_UINT:
346
         tex_format = BRW_SURFACEFORMAT_R8_UNORM;
347
         break;
348
 
349
      case BRW_SURFACEFORMAT_R16_SINT:
350
      case BRW_SURFACEFORMAT_R16_UINT:
351
         tex_format = BRW_SURFACEFORMAT_R16_UNORM;
352
         break;
353
 
354
      case BRW_SURFACEFORMAT_R32_SINT:
355
      case BRW_SURFACEFORMAT_R32_UINT:
356
         tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
357
         break;
358
 
359
      default:
360
         break;
361
      }
362
   }
363
 
364
   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
365
	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
366
	      BRW_SURFACE_CUBEFACE_ENABLES |
367
	      tex_format << BRW_SURFACE_FORMAT_SHIFT);
368
 
369
   surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
370
 
371
   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
372
	      (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
373
	      (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
374
 
375
   surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
376
	      (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
377
	      (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
378
 
379
   surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
380
              SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
381
 
382
   surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
383
 
384
   /* Emit relocation to surface contents */
385
   drm_intel_bo_emit_reloc(brw->batch.bo,
386
                           *surf_offset + 4,
387
                           mt->bo,
388
                           surf[1] - mt->bo->offset64,
389
                           I915_GEM_DOMAIN_SAMPLER, 0);
390
}
391
 
392
/**
393
 * Create the constant buffer surface.  Vertex/fragment shader constants will be
394
 * read from this buffer with Data Port Read instructions/messages.
395
 */
396
void
397
brw_create_constant_surface(struct brw_context *brw,
398
			    drm_intel_bo *bo,
399
			    uint32_t offset,
400
			    uint32_t size,
401
			    uint32_t *out_offset,
402
                            bool dword_pitch)
403
{
404
   uint32_t stride = dword_pitch ? 4 : 16;
405
   uint32_t elements = ALIGN(size, stride) / stride;
406
 
407
   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
408
                                       BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
409
                                       elements, stride, false);
410
}
411
 
412
/**
413
 * Set up a binding table entry for use by stream output logic (transform
414
 * feedback).
415
 *
416
 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
417
 */
418
void
419
brw_update_sol_surface(struct brw_context *brw,
420
                       struct gl_buffer_object *buffer_obj,
421
                       uint32_t *out_offset, unsigned num_vector_components,
422
                       unsigned stride_dwords, unsigned offset_dwords)
423
{
424
   struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
425
   uint32_t offset_bytes = 4 * offset_dwords;
426
   drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
427
                                             offset_bytes,
428
                                             buffer_obj->Size - offset_bytes);
429
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
430
                                    out_offset);
431
   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
432
   size_t size_dwords = buffer_obj->Size / 4;
433
   uint32_t buffer_size_minus_1, width, height, depth, surface_format;
434
 
435
   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
436
    * too big to map using a single binding table entry?
437
    */
438
   assert((size_dwords - offset_dwords) / stride_dwords
439
          <= BRW_MAX_NUM_BUFFER_ENTRIES);
440
 
441
   if (size_dwords > offset_dwords + num_vector_components) {
442
      /* There is room for at least 1 transform feedback output in the buffer.
443
       * Compute the number of additional transform feedback outputs the
444
       * buffer has room for.
445
       */
446
      buffer_size_minus_1 =
447
         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
448
   } else {
449
      /* There isn't even room for a single transform feedback output in the
450
       * buffer.  We can't configure the binding table entry to prevent output
451
       * entirely; we'll have to rely on the geometry shader to detect
452
       * overflow.  But to minimize the damage in case of a bug, set up the
453
       * binding table entry to just allow a single output.
454
       */
455
      buffer_size_minus_1 = 0;
456
   }
457
   width = buffer_size_minus_1 & 0x7f;
458
   height = (buffer_size_minus_1 & 0xfff80) >> 7;
459
   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
460
 
461
   switch (num_vector_components) {
462
   case 1:
463
      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
464
      break;
465
   case 2:
466
      surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
467
      break;
468
   case 3:
469
      surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
470
      break;
471
   case 4:
472
      surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
473
      break;
474
   default:
475
      unreachable("Invalid vector size for transform feedback output");
476
   }
477
 
478
   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
479
      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
480
      surface_format << BRW_SURFACE_FORMAT_SHIFT |
481
      BRW_SURFACE_RC_READ_WRITE;
482
   surf[1] = bo->offset64 + offset_bytes; /* reloc */
483
   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
484
	      height << BRW_SURFACE_HEIGHT_SHIFT);
485
   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
486
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
487
   surf[4] = 0;
488
   surf[5] = 0;
489
 
490
   /* Emit relocation to surface contents. */
491
   drm_intel_bo_emit_reloc(brw->batch.bo,
492
			   *out_offset + 4,
493
			   bo, offset_bytes,
494
			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
495
}
496
 
497
/* Creates a new WM constant buffer reflecting the current fragment program's
498
 * constants, if needed by the fragment program.
499
 *
500
 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
501
 * state atom.
502
 */
503
static void
504
brw_upload_wm_pull_constants(struct brw_context *brw)
505
{
506
   struct brw_stage_state *stage_state = &brw->wm.base;
507
   /* BRW_NEW_FRAGMENT_PROGRAM */
508
   struct brw_fragment_program *fp =
509
      (struct brw_fragment_program *) brw->fragment_program;
510
   /* BRW_NEW_FS_PROG_DATA */
511
   struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
512
 
513
   /* _NEW_PROGRAM_CONSTANTS */
514
   brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
515
                             stage_state, prog_data, true);
516
}
517
 
518
const struct brw_tracked_state brw_wm_pull_constants = {
519
   .dirty = {
520
      .mesa = _NEW_PROGRAM_CONSTANTS,
521
      .brw = BRW_NEW_BATCH |
522
             BRW_NEW_FRAGMENT_PROGRAM |
523
             BRW_NEW_FS_PROG_DATA,
524
   },
525
   .emit = brw_upload_wm_pull_constants,
526
};
527
 
528
/**
529
 * Creates a null renderbuffer surface.
530
 *
531
 * This is used when the shader doesn't write to any color output.  An FB
532
 * write to target 0 will still be emitted, because that's how the thread is
533
 * terminated (and computed depth is returned), so we need to have the
534
 * hardware discard the target 0 color output..
535
 */
536
static void
537
brw_emit_null_surface_state(struct brw_context *brw,
538
                            unsigned width,
539
                            unsigned height,
540
                            unsigned samples,
541
                            uint32_t *out_offset)
542
{
543
   /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
544
    * Notes):
545
    *
546
    *     A null surface will be used in instances where an actual surface is
547
    *     not bound. When a write message is generated to a null surface, no
548
    *     actual surface is written to. When a read message (including any
549
    *     sampling engine message) is generated to a null surface, the result
550
    *     is all zeros. Note that a null surface type is allowed to be used
551
    *     with all messages, even if it is not specificially indicated as
552
    *     supported. All of the remaining fields in surface state are ignored
553
    *     for null surfaces, with the following exceptions:
554
    *
555
    *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
556
    *       depth buffer’s corresponding state for all render target surfaces,
557
    *       including null.
558
    *
559
    *     - Surface Format must be R8G8B8A8_UNORM.
560
    */
561
   unsigned surface_type = BRW_SURFACE_NULL;
562
   drm_intel_bo *bo = NULL;
563
   unsigned pitch_minus_1 = 0;
564
   uint32_t multisampling_state = 0;
565
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
566
                                    out_offset);
567
 
568
   if (samples > 1) {
569
      /* On Gen6, null render targets seem to cause GPU hangs when
570
       * multisampling.  So work around this problem by rendering into dummy
571
       * color buffer.
572
       *
573
       * To decrease the amount of memory needed by the workaround buffer, we
574
       * set its pitch to 128 bytes (the width of a Y tile).  This means that
575
       * the amount of memory needed for the workaround buffer is
576
       * (width_in_tiles + height_in_tiles - 1) tiles.
577
       *
578
       * Note that since the workaround buffer will be interpreted by the
579
       * hardware as an interleaved multisampled buffer, we need to compute
580
       * width_in_tiles and height_in_tiles by dividing the width and height
581
       * by 16 rather than the normal Y-tile size of 32.
582
       */
583
      unsigned width_in_tiles = ALIGN(width, 16) / 16;
584
      unsigned height_in_tiles = ALIGN(height, 16) / 16;
585
      unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
586
      brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
587
                         size_needed);
588
      bo = brw->wm.multisampled_null_render_target_bo;
589
      surface_type = BRW_SURFACE_2D;
590
      pitch_minus_1 = 127;
591
      multisampling_state = brw_get_surface_num_multisamples(samples);
592
   }
593
 
594
   surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
595
	      BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
596
   if (brw->gen < 6) {
597
      surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
598
		  1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
599
		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
600
		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
601
   }
602
   surf[1] = bo ? bo->offset64 : 0;
603
   surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
604
              (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
605
 
606
   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
607
    * Notes):
608
    *
609
    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
610
    */
611
   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
612
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
613
   surf[4] = multisampling_state;
614
   surf[5] = 0;
615
 
616
   if (bo) {
617
      drm_intel_bo_emit_reloc(brw->batch.bo,
618
                              *out_offset + 4,
619
                              bo, 0,
620
                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
621
   }
622
}
623
 
624
/**
625
 * Sets up a surface state structure to point at the given region.
626
 * While it is only used for the front/back buffer currently, it should be
627
 * usable for further buffers when doing ARB_draw_buffer support.
628
 */
629
static uint32_t
630
brw_update_renderbuffer_surface(struct brw_context *brw,
631
                                struct gl_renderbuffer *rb,
632
                                bool layered, unsigned unit,
633
                                uint32_t surf_index)
634
{
635
   struct gl_context *ctx = &brw->ctx;
636
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
637
   struct intel_mipmap_tree *mt = irb->mt;
638
   uint32_t *surf;
639
   uint32_t tile_x, tile_y;
640
   uint32_t format = 0;
641
   uint32_t offset;
642
   /* _NEW_BUFFERS */
643
   mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
644
   /* BRW_NEW_FS_PROG_DATA */
645
 
646
   assert(!layered);
647
 
648
   if (rb->TexImage && !brw->has_surface_tile_offset) {
649
      intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
650
 
651
      if (tile_x != 0 || tile_y != 0) {
652
	 /* Original gen4 hardware couldn't draw to a non-tile-aligned
653
	  * destination in a miptree unless you actually setup your renderbuffer
654
	  * as a miptree and used the fragile lod/array_index/etc. controls to
655
	  * select the image.  So, instead, we just make a new single-level
656
	  * miptree and render into that.
657
	  */
658
	 intel_renderbuffer_move_to_temp(brw, irb, false);
659
	 mt = irb->mt;
660
      }
661
   }
662
 
663
   intel_miptree_used_for_rendering(irb->mt);
664
 
665
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
666
 
667
   format = brw->render_target_format[rb_format];
668
   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
669
      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
670
                    __func__, _mesa_get_format_name(rb_format));
671
   }
672
 
673
   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
674
	      format << BRW_SURFACE_FORMAT_SHIFT);
675
 
676
   /* reloc */
677
   assert(mt->offset % mt->cpp == 0);
678
   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
679
	      mt->bo->offset64 + mt->offset);
680
 
681
   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
682
	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
683
 
684
   surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
685
	      (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
686
 
687
   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
688
 
689
   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
690
   /* Note that the low bits of these fields are missing, so
691
    * there's the possibility of getting in trouble.
692
    */
693
   assert(tile_x % 4 == 0);
694
   assert(tile_y % 2 == 0);
695
   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
696
	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
697
	      (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
698
 
699
   if (brw->gen < 6) {
700
      /* _NEW_COLOR */
701
      if (!ctx->Color.ColorLogicOpEnabled &&
702
	  (ctx->Color.BlendEnabled & (1 << unit)))
703
	 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
704
 
705
      if (!ctx->Color.ColorMask[unit][0])
706
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
707
      if (!ctx->Color.ColorMask[unit][1])
708
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
709
      if (!ctx->Color.ColorMask[unit][2])
710
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
711
 
712
      /* As mentioned above, disable writes to the alpha component when the
713
       * renderbuffer is XRGB.
714
       */
715
      if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
716
	  !ctx->Color.ColorMask[unit][3]) {
717
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
718
      }
719
   }
720
 
721
   drm_intel_bo_emit_reloc(brw->batch.bo,
722
                           offset + 4,
723
                           mt->bo,
724
                           surf[1] - mt->bo->offset64,
725
                           I915_GEM_DOMAIN_RENDER,
726
                           I915_GEM_DOMAIN_RENDER);
727
 
728
   return offset;
729
}
730
 
731
/**
732
 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
733
 */
734
void
735
brw_update_renderbuffer_surfaces(struct brw_context *brw,
736
                                 const struct gl_framebuffer *fb,
737
                                 uint32_t render_target_start,
738
                                 uint32_t *surf_offset)
739
{
740
   GLuint i;
741
 
742
   /* Update surfaces for drawing buffers */
743
   if (fb->_NumColorDrawBuffers >= 1) {
744
      for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
745
         const uint32_t surf_index = render_target_start + i;
746
 
747
	 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
748
            surf_offset[surf_index] =
749
               brw->vtbl.update_renderbuffer_surface(
750
                  brw, fb->_ColorDrawBuffers[i],
751
                  fb->MaxNumLayers > 0, i, surf_index);
752
	 } else {
753
            brw->vtbl.emit_null_surface_state(
754
               brw, fb->Width, fb->Height, fb->Visual.samples,
755
               &surf_offset[surf_index]);
756
	 }
757
      }
758
   } else {
759
      const uint32_t surf_index = render_target_start;
760
      brw->vtbl.emit_null_surface_state(
761
         brw, fb->Width, fb->Height, fb->Visual.samples,
762
         &surf_offset[surf_index]);
763
   }
764
}
765
 
766
static void
767
update_renderbuffer_surfaces(struct brw_context *brw)
768
{
769
   const struct gl_context *ctx = &brw->ctx;
770
 
771
   /* _NEW_BUFFERS | _NEW_COLOR */
772
   const struct gl_framebuffer *fb = ctx->DrawBuffer;
773
   brw_update_renderbuffer_surfaces(
774
      brw, fb,
775
      brw->wm.prog_data->binding_table.render_target_start,
776
      brw->wm.base.surf_offset);
777
   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
778
}
779
 
780
const struct brw_tracked_state brw_renderbuffer_surfaces = {
781
   .dirty = {
782
      .mesa = _NEW_BUFFERS |
783
              _NEW_COLOR,
784
      .brw = BRW_NEW_BATCH |
785
             BRW_NEW_FS_PROG_DATA,
786
   },
787
   .emit = update_renderbuffer_surfaces,
788
};
789
 
790
const struct brw_tracked_state gen6_renderbuffer_surfaces = {
791
   .dirty = {
792
      .mesa = _NEW_BUFFERS,
793
      .brw = BRW_NEW_BATCH,
794
   },
795
   .emit = update_renderbuffer_surfaces,
796
};
797
 
798
 
799
static void
800
update_stage_texture_surfaces(struct brw_context *brw,
801
                              const struct gl_program *prog,
802
                              struct brw_stage_state *stage_state,
803
                              bool for_gather)
804
{
805
   if (!prog)
806
      return;
807
 
808
   struct gl_context *ctx = &brw->ctx;
809
 
810
   uint32_t *surf_offset = stage_state->surf_offset;
811
 
812
   /* BRW_NEW_*_PROG_DATA */
813
   if (for_gather)
814
      surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
815
   else
816
      surf_offset += stage_state->prog_data->binding_table.texture_start;
817
 
818
   unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
819
   for (unsigned s = 0; s < num_samplers; s++) {
820
      surf_offset[s] = 0;
821
 
822
      if (prog->SamplersUsed & (1 << s)) {
823
         const unsigned unit = prog->SamplerUnits[s];
824
 
825
         /* _NEW_TEXTURE */
826
         if (ctx->Texture.Unit[unit]._Current) {
827
            brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
828
         }
829
      }
830
   }
831
}
832
 
833
 
834
/**
835
 * Construct SURFACE_STATE objects for enabled textures.
836
 */
837
static void
838
brw_update_texture_surfaces(struct brw_context *brw)
839
{
840
   /* BRW_NEW_VERTEX_PROGRAM */
841
   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
842
 
843
   /* BRW_NEW_GEOMETRY_PROGRAM */
844
   struct gl_program *gs = (struct gl_program *) brw->geometry_program;
845
 
846
   /* BRW_NEW_FRAGMENT_PROGRAM */
847
   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
848
 
849
   /* _NEW_TEXTURE */
850
   update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
851
   update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
852
   update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
853
 
854
   /* emit alternate set of surface state for gather. this
855
    * allows the surface format to be overriden for only the
856
    * gather4 messages. */
857
   if (brw->gen < 8) {
858
      if (vs && vs->UsesGather)
859
         update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
860
      if (gs && gs->UsesGather)
861
         update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
862
      if (fs && fs->UsesGather)
863
         update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
864
   }
865
 
866
   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
867
}
868
 
869
const struct brw_tracked_state brw_texture_surfaces = {
870
   .dirty = {
871
      .mesa = _NEW_TEXTURE,
872
      .brw = BRW_NEW_BATCH |
873
             BRW_NEW_FRAGMENT_PROGRAM |
874
             BRW_NEW_FS_PROG_DATA |
875
             BRW_NEW_GEOMETRY_PROGRAM |
876
             BRW_NEW_GS_PROG_DATA |
877
             BRW_NEW_TEXTURE_BUFFER |
878
             BRW_NEW_VERTEX_PROGRAM |
879
             BRW_NEW_VS_PROG_DATA,
880
   },
881
   .emit = brw_update_texture_surfaces,
882
};
883
 
884
void
885
brw_upload_ubo_surfaces(struct brw_context *brw,
886
			struct gl_shader *shader,
887
                        struct brw_stage_state *stage_state,
888
                        struct brw_stage_prog_data *prog_data,
889
                        bool dword_pitch)
890
{
891
   struct gl_context *ctx = &brw->ctx;
892
 
893
   if (!shader)
894
      return;
895
 
896
   uint32_t *surf_offsets =
897
      &stage_state->surf_offset[prog_data->binding_table.ubo_start];
898
 
899
   for (int i = 0; i < shader->NumUniformBlocks; i++) {
900
      struct gl_uniform_buffer_binding *binding;
901
      struct intel_buffer_object *intel_bo;
902
 
903
      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
904
      intel_bo = intel_buffer_object(binding->BufferObject);
905
      drm_intel_bo *bo =
906
         intel_bufferobj_buffer(brw, intel_bo,
907
                                binding->Offset,
908
                                binding->BufferObject->Size - binding->Offset);
909
 
910
      /* Because behavior for referencing outside of the binding's size in the
911
       * glBindBufferRange case is undefined, we can just bind the whole buffer
912
       * glBindBufferBase wants and be a correct implementation.
913
       */
914
      brw_create_constant_surface(brw, bo, binding->Offset,
915
                                  bo->size - binding->Offset,
916
                                  &surf_offsets[i],
917
                                  dword_pitch);
918
   }
919
 
920
   if (shader->NumUniformBlocks)
921
      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
922
}
923
 
924
static void
925
brw_upload_wm_ubo_surfaces(struct brw_context *brw)
926
{
927
   struct gl_context *ctx = &brw->ctx;
928
   /* _NEW_PROGRAM */
929
   struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
930
 
931
   if (!prog)
932
      return;
933
 
934
   /* BRW_NEW_FS_PROG_DATA */
935
   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
936
                           &brw->wm.base, &brw->wm.prog_data->base, true);
937
}
938
 
939
const struct brw_tracked_state brw_wm_ubo_surfaces = {
940
   .dirty = {
941
      .mesa = _NEW_PROGRAM,
942
      .brw = BRW_NEW_BATCH |
943
             BRW_NEW_FS_PROG_DATA |
944
             BRW_NEW_UNIFORM_BUFFER,
945
   },
946
   .emit = brw_upload_wm_ubo_surfaces,
947
};
948
 
949
void
950
brw_upload_abo_surfaces(struct brw_context *brw,
951
			struct gl_shader_program *prog,
952
                        struct brw_stage_state *stage_state,
953
                        struct brw_stage_prog_data *prog_data)
954
{
955
   struct gl_context *ctx = &brw->ctx;
956
   uint32_t *surf_offsets =
957
      &stage_state->surf_offset[prog_data->binding_table.abo_start];
958
 
959
   for (int i = 0; i < prog->NumAtomicBuffers; i++) {
960
      struct gl_atomic_buffer_binding *binding =
961
         &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
962
      struct intel_buffer_object *intel_bo =
963
         intel_buffer_object(binding->BufferObject);
964
      drm_intel_bo *bo = intel_bufferobj_buffer(
965
         brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
966
 
967
      brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
968
                                          binding->Offset, BRW_SURFACEFORMAT_RAW,
969
                                          bo->size - binding->Offset, 1, true);
970
   }
971
 
972
   if (prog->NumAtomicBuffers)
973
      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
974
}
975
 
976
static void
977
brw_upload_wm_abo_surfaces(struct brw_context *brw)
978
{
979
   struct gl_context *ctx = &brw->ctx;
980
   /* _NEW_PROGRAM */
981
   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
982
 
983
   if (prog) {
984
      /* BRW_NEW_FS_PROG_DATA */
985
      brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
986
                              &brw->wm.prog_data->base);
987
   }
988
}
989
 
990
const struct brw_tracked_state brw_wm_abo_surfaces = {
991
   .dirty = {
992
      .mesa = _NEW_PROGRAM,
993
      .brw = BRW_NEW_ATOMIC_BUFFER |
994
             BRW_NEW_BATCH |
995
             BRW_NEW_FS_PROG_DATA,
996
   },
997
   .emit = brw_upload_wm_abo_surfaces,
998
};
999
 
1000
static void
1001
brw_upload_cs_abo_surfaces(struct brw_context *brw)
1002
{
1003
   struct gl_context *ctx = &brw->ctx;
1004
   /* _NEW_PROGRAM */
1005
   struct gl_shader_program *prog =
1006
      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1007
 
1008
   if (prog) {
1009
      /* BRW_NEW_CS_PROG_DATA */
1010
      brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1011
                              &brw->cs.prog_data->base);
1012
   }
1013
}
1014
 
1015
const struct brw_tracked_state brw_cs_abo_surfaces = {
1016
   .dirty = {
1017
      .mesa = _NEW_PROGRAM,
1018
      .brw = BRW_NEW_ATOMIC_BUFFER |
1019
             BRW_NEW_BATCH |
1020
             BRW_NEW_CS_PROG_DATA,
1021
   },
1022
   .emit = brw_upload_cs_abo_surfaces,
1023
};
1024
 
1025
void
1026
gen4_init_vtable_surface_functions(struct brw_context *brw)
1027
{
1028
   brw->vtbl.update_texture_surface = brw_update_texture_surface;
1029
   brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1030
   brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1031
   brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1032
}