Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/**************************************************************************
2
 *
3
 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
#include 
29
 
30
#include "main/glheader.h"
31
#include "main/context.h"
32
#include "main/condrender.h"
33
#include "main/samplerobj.h"
34
#include "main/state.h"
35
#include "main/enums.h"
36
#include "main/macros.h"
37
#include "main/transformfeedback.h"
38
#include "tnl/tnl.h"
39
#include "vbo/vbo_context.h"
40
#include "swrast/swrast.h"
41
#include "swrast_setup/swrast_setup.h"
42
#include "drivers/common/meta.h"
43
 
44
#include "brw_blorp.h"
45
#include "brw_draw.h"
46
#include "brw_defines.h"
47
#include "brw_context.h"
48
#include "brw_state.h"
49
 
50
#include "intel_batchbuffer.h"
51
#include "intel_fbo.h"
52
#include "intel_mipmap_tree.h"
53
#include "intel_regions.h"
54
 
55
#define FILE_DEBUG_FLAG DEBUG_PRIMS
56
 
57
static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
58
   _3DPRIM_POINTLIST,
59
   _3DPRIM_LINELIST,
60
   _3DPRIM_LINELOOP,
61
   _3DPRIM_LINESTRIP,
62
   _3DPRIM_TRILIST,
63
   _3DPRIM_TRISTRIP,
64
   _3DPRIM_TRIFAN,
65
   _3DPRIM_QUADLIST,
66
   _3DPRIM_QUADSTRIP,
67
   _3DPRIM_POLYGON
68
};
69
 
70
 
71
static const GLenum reduced_prim[GL_POLYGON+1] = {
72
   GL_POINTS,
73
   GL_LINES,
74
   GL_LINES,
75
   GL_LINES,
76
   GL_TRIANGLES,
77
   GL_TRIANGLES,
78
   GL_TRIANGLES,
79
   GL_TRIANGLES,
80
   GL_TRIANGLES,
81
   GL_TRIANGLES
82
};
83
 
84
 
85
/* When the primitive changes, set a state bit and re-validate.  Not
86
 * the nicest and would rather deal with this by having all the
87
 * programs be immune to the active primitive (ie. cope with all
88
 * possibilities).  That may not be realistic however.
89
 */
90
static void brw_set_prim(struct brw_context *brw,
91
                         const struct _mesa_prim *prim)
92
{
93
   struct gl_context *ctx = &brw->ctx;
94
   uint32_t hw_prim = prim_to_hw_prim[prim->mode];
95
 
96
   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
97
 
98
   /* Slight optimization to avoid the GS program when not needed:
99
    */
100
   if (prim->mode == GL_QUAD_STRIP &&
101
       ctx->Light.ShadeModel != GL_FLAT &&
102
       ctx->Polygon.FrontMode == GL_FILL &&
103
       ctx->Polygon.BackMode == GL_FILL)
104
      hw_prim = _3DPRIM_TRISTRIP;
105
 
106
   if (prim->mode == GL_QUADS && prim->count == 4 &&
107
       ctx->Light.ShadeModel != GL_FLAT &&
108
       ctx->Polygon.FrontMode == GL_FILL &&
109
       ctx->Polygon.BackMode == GL_FILL) {
110
      hw_prim = _3DPRIM_TRIFAN;
111
   }
112
 
113
   if (hw_prim != brw->primitive) {
114
      brw->primitive = hw_prim;
115
      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
116
 
117
      if (reduced_prim[prim->mode] != brw->reduced_primitive) {
118
	 brw->reduced_primitive = reduced_prim[prim->mode];
119
	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
120
      }
121
   }
122
}
123
 
124
static void gen6_set_prim(struct brw_context *brw,
125
                          const struct _mesa_prim *prim)
126
{
127
   uint32_t hw_prim;
128
 
129
   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
130
 
131
   hw_prim = prim_to_hw_prim[prim->mode];
132
 
133
   if (hw_prim != brw->primitive) {
134
      brw->primitive = hw_prim;
135
      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
136
   }
137
}
138
 
139
 
140
/**
141
 * The hardware is capable of removing dangling vertices on its own; however,
142
 * prior to Gen6, we sometimes convert quads into trifans (and quad strips
143
 * into tristrips), since pre-Gen6 hardware requires a GS to render quads.
144
 * This function manually trims dangling vertices from a draw call involving
145
 * quads so that those dangling vertices won't get drawn when we convert to
146
 * trifans/tristrips.
147
 */
148
static GLuint trim(GLenum prim, GLuint length)
149
{
150
   if (prim == GL_QUAD_STRIP)
151
      return length > 3 ? (length - length % 2) : 0;
152
   else if (prim == GL_QUADS)
153
      return length - length % 4;
154
   else
155
      return length;
156
}
157
 
158
 
159
static void brw_emit_prim(struct brw_context *brw,
160
			  const struct _mesa_prim *prim,
161
			  uint32_t hw_prim)
162
{
163
   int verts_per_instance;
164
   int vertex_access_type;
165
   int start_vertex_location;
166
   int base_vertex_location;
167
 
168
   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
169
       prim->start, prim->count);
170
 
171
   start_vertex_location = prim->start;
172
   base_vertex_location = prim->basevertex;
173
   if (prim->indexed) {
174
      vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
175
      start_vertex_location += brw->ib.start_vertex_offset;
176
      base_vertex_location += brw->vb.start_vertex_bias;
177
   } else {
178
      vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
179
      start_vertex_location += brw->vb.start_vertex_bias;
180
   }
181
 
182
   /* We only need to trim the primitive count on pre-Gen6. */
183
   if (brw->gen < 6)
184
      verts_per_instance = trim(prim->mode, prim->count);
185
   else
186
      verts_per_instance = prim->count;
187
 
188
   /* If nothing to emit, just return. */
189
   if (verts_per_instance == 0)
190
      return;
191
 
192
   /* If we're set to always flush, do it before and after the primitive emit.
193
    * We want to catch both missed flushes that hurt instruction/state cache
194
    * and missed flushes of the render cache as it heads to other parts of
195
    * the besides the draw code.
196
    */
197
   if (brw->always_flush_cache) {
198
      intel_batchbuffer_emit_mi_flush(brw);
199
   }
200
 
201
   BEGIN_BATCH(6);
202
   OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
203
	     hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
204
	     vertex_access_type);
205
   OUT_BATCH(verts_per_instance);
206
   OUT_BATCH(start_vertex_location);
207
   OUT_BATCH(prim->num_instances);
208
   OUT_BATCH(prim->base_instance);
209
   OUT_BATCH(base_vertex_location);
210
   ADVANCE_BATCH();
211
 
212
   brw->batch.need_workaround_flush = true;
213
 
214
   if (brw->always_flush_cache) {
215
      intel_batchbuffer_emit_mi_flush(brw);
216
   }
217
}
218
 
219
static void gen7_emit_prim(struct brw_context *brw,
220
			   const struct _mesa_prim *prim,
221
			   uint32_t hw_prim)
222
{
223
   int verts_per_instance;
224
   int vertex_access_type;
225
   int start_vertex_location;
226
   int base_vertex_location;
227
 
228
   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
229
       prim->start, prim->count);
230
 
231
   start_vertex_location = prim->start;
232
   base_vertex_location = prim->basevertex;
233
   if (prim->indexed) {
234
      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
235
      start_vertex_location += brw->ib.start_vertex_offset;
236
      base_vertex_location += brw->vb.start_vertex_bias;
237
   } else {
238
      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
239
      start_vertex_location += brw->vb.start_vertex_bias;
240
   }
241
 
242
   verts_per_instance = prim->count;
243
 
244
   /* If nothing to emit, just return. */
245
   if (verts_per_instance == 0)
246
      return;
247
 
248
   /* If we're set to always flush, do it before and after the primitive emit.
249
    * We want to catch both missed flushes that hurt instruction/state cache
250
    * and missed flushes of the render cache as it heads to other parts of
251
    * the besides the draw code.
252
    */
253
   if (brw->always_flush_cache) {
254
      intel_batchbuffer_emit_mi_flush(brw);
255
   }
256
 
257
   BEGIN_BATCH(7);
258
   OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
259
   OUT_BATCH(hw_prim | vertex_access_type);
260
   OUT_BATCH(verts_per_instance);
261
   OUT_BATCH(start_vertex_location);
262
   OUT_BATCH(prim->num_instances);
263
   OUT_BATCH(prim->base_instance);
264
   OUT_BATCH(base_vertex_location);
265
   ADVANCE_BATCH();
266
 
267
   if (brw->always_flush_cache) {
268
      intel_batchbuffer_emit_mi_flush(brw);
269
   }
270
}
271
 
272
 
273
static void brw_merge_inputs( struct brw_context *brw,
274
		       const struct gl_client_array *arrays[])
275
{
276
   GLuint i;
277
 
278
   for (i = 0; i < brw->vb.nr_buffers; i++) {
279
      drm_intel_bo_unreference(brw->vb.buffers[i].bo);
280
      brw->vb.buffers[i].bo = NULL;
281
   }
282
   brw->vb.nr_buffers = 0;
283
 
284
   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
285
      brw->vb.inputs[i].buffer = -1;
286
      brw->vb.inputs[i].glarray = arrays[i];
287
      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
288
   }
289
}
290
 
291
/*
292
 * \brief Resolve buffers before drawing.
293
 *
294
 * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
295
 * enabled depth texture.
296
 *
297
 * (In the future, this will also perform MSAA resolves).
298
 */
299
static void
300
brw_predraw_resolve_buffers(struct brw_context *brw)
301
{
302
   struct gl_context *ctx = &brw->ctx;
303
   struct intel_renderbuffer *depth_irb;
304
   struct intel_texture_object *tex_obj;
305
 
306
   /* Resolve the depth buffer's HiZ buffer. */
307
   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
308
   if (depth_irb)
309
      intel_renderbuffer_resolve_hiz(brw, depth_irb);
310
 
311
   /* Resolve depth buffer of each enabled depth texture, and color buffer of
312
    * each fast-clear-enabled color texture.
313
    */
314
   for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
315
      if (!ctx->Texture.Unit[i]._ReallyEnabled)
316
	 continue;
317
      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
318
      if (!tex_obj || !tex_obj->mt)
319
	 continue;
320
      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
321
      intel_miptree_resolve_color(brw, tex_obj->mt);
322
   }
323
}
324
 
325
/**
326
 * \brief Call this after drawing to mark which buffers need resolving
327
 *
328
 * If the depth buffer was written to and if it has an accompanying HiZ
329
 * buffer, then mark that it needs a depth resolve.
330
 *
331
 * If the color buffer is a multisample window system buffer, then
332
 * mark that it needs a downsample.
333
 */
334
static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
335
{
336
   struct gl_context *ctx = &brw->ctx;
337
   struct gl_framebuffer *fb = ctx->DrawBuffer;
338
 
339
   struct intel_renderbuffer *front_irb = NULL;
340
   struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
341
   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
342
 
343
   if (brw->is_front_buffer_rendering)
344
      front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
345
 
346
   if (front_irb)
347
      intel_renderbuffer_set_needs_downsample(front_irb);
348
   if (back_irb)
349
      intel_renderbuffer_set_needs_downsample(back_irb);
350
   if (depth_irb && ctx->Depth.Mask)
351
      intel_renderbuffer_set_needs_depth_resolve(depth_irb);
352
}
353
 
354
/* May fail if out of video memory for texture or vbo upload, or on
355
 * fallback conditions.
356
 */
357
static bool brw_try_draw_prims( struct gl_context *ctx,
358
				     const struct gl_client_array *arrays[],
359
				     const struct _mesa_prim *prim,
360
				     GLuint nr_prims,
361
				     const struct _mesa_index_buffer *ib,
362
				     GLuint min_index,
363
				     GLuint max_index )
364
{
365
   struct brw_context *brw = brw_context(ctx);
366
   bool retval = true;
367
   GLuint i;
368
   bool fail_next = false;
369
 
370
   if (ctx->NewState)
371
      _mesa_update_state( ctx );
372
 
373
   /* We have to validate the textures *before* checking for fallbacks;
374
    * otherwise, the software fallback won't be able to rely on the
375
    * texture state, the firstLevel and lastLevel fields won't be
376
    * set in the intel texture object (they'll both be 0), and the
377
    * software fallback will segfault if it attempts to access any
378
    * texture level other than level 0.
379
    */
380
   brw_validate_textures( brw );
381
 
382
   intel_prepare_render(brw);
383
 
384
   /* This workaround has to happen outside of brw_upload_state() because it
385
    * may flush the batchbuffer for a blit, affecting the state flags.
386
    */
387
   brw_workaround_depthstencil_alignment(brw, 0);
388
 
389
   /* Resolves must occur after updating renderbuffers, updating context state,
390
    * and finalizing textures but before setting up any hardware state for
391
    * this draw call.
392
    */
393
   brw_predraw_resolve_buffers(brw);
394
 
395
   /* Bind all inputs, derive varying and size information:
396
    */
397
   brw_merge_inputs( brw, arrays );
398
 
399
   brw->ib.ib = ib;
400
   brw->state.dirty.brw |= BRW_NEW_INDICES;
401
 
402
   brw->vb.min_index = min_index;
403
   brw->vb.max_index = max_index;
404
   brw->state.dirty.brw |= BRW_NEW_VERTICES;
405
 
406
   for (i = 0; i < nr_prims; i++) {
407
      int estimated_max_prim_size;
408
 
409
      estimated_max_prim_size = 512; /* batchbuffer commands */
410
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
411
				  (sizeof(struct brw_sampler_state) +
412
				   sizeof(struct gen5_sampler_default_color)));
413
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
414
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
415
      estimated_max_prim_size += 512; /* misc. pad */
416
 
417
      /* Flush the batch if it's approaching full, so that we don't wrap while
418
       * we've got validated state that needs to be in the same batch as the
419
       * primitives.
420
       */
421
      intel_batchbuffer_require_space(brw, estimated_max_prim_size, false);
422
      intel_batchbuffer_save_state(brw);
423
 
424
      if (brw->num_instances != prim->num_instances) {
425
         brw->num_instances = prim->num_instances;
426
         brw->state.dirty.brw |= BRW_NEW_VERTICES;
427
      }
428
      if (brw->basevertex != prim->basevertex) {
429
         brw->basevertex = prim->basevertex;
430
         brw->state.dirty.brw |= BRW_NEW_VERTICES;
431
      }
432
      if (brw->gen < 6)
433
	 brw_set_prim(brw, &prim[i]);
434
      else
435
	 gen6_set_prim(brw, &prim[i]);
436
 
437
retry:
438
      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
439
       * that the state updated in the loop outside of this block is that in
440
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
441
       * brw->state.dirty.brw.
442
       */
443
      if (brw->state.dirty.brw) {
444
	 brw->no_batch_wrap = true;
445
	 brw_upload_state(brw);
446
      }
447
 
448
      if (brw->gen >= 7)
449
	 gen7_emit_prim(brw, &prim[i], brw->primitive);
450
      else
451
	 brw_emit_prim(brw, &prim[i], brw->primitive);
452
 
453
      brw->no_batch_wrap = false;
454
 
455
      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
456
	 if (!fail_next) {
457
	    intel_batchbuffer_reset_to_saved(brw);
458
	    intel_batchbuffer_flush(brw);
459
	    fail_next = true;
460
	    goto retry;
461
	 } else {
462
	    if (intel_batchbuffer_flush(brw) == -ENOSPC) {
463
	       static bool warned = false;
464
 
465
	       if (!warned) {
466
		  fprintf(stderr, "i965: Single primitive emit exceeded"
467
			  "available aperture space\n");
468
		  warned = true;
469
	       }
470
 
471
	       retval = false;
472
	    }
473
	 }
474
      }
475
   }
476
 
477
   if (brw->always_flush_batch)
478
      intel_batchbuffer_flush(brw);
479
 
480
   brw_state_cache_check_size(brw);
481
   brw_postdraw_set_buffers_need_resolve(brw);
482
 
483
   return retval;
484
}
485
 
486
void brw_draw_prims( struct gl_context *ctx,
487
		     const struct _mesa_prim *prim,
488
		     GLuint nr_prims,
489
		     const struct _mesa_index_buffer *ib,
490
		     GLboolean index_bounds_valid,
491
		     GLuint min_index,
492
		     GLuint max_index,
493
		     struct gl_transform_feedback_object *tfb_vertcount )
494
{
495
   struct brw_context *brw = brw_context(ctx);
496
   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
497
 
498
   if (!_mesa_check_conditional_render(ctx))
499
      return;
500
 
501
   /* Handle primitive restart if needed */
502
   if (brw_handle_primitive_restart(ctx, prim, nr_prims, ib)) {
503
      /* The draw was handled, so we can exit now */
504
      return;
505
   }
506
 
507
   /* If we're going to have to upload any of the user's vertex arrays, then
508
    * get the minimum and maximum of their index buffer so we know what range
509
    * to upload.
510
    */
511
   if (!vbo_all_varyings_in_vbos(arrays) && !index_bounds_valid)
512
      vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims);
513
 
514
   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
515
    * won't support all the extensions we support.
516
    */
517
   if (ctx->RenderMode != GL_RENDER) {
518
      perf_debug("%s render mode not supported in hardware\n",
519
                 _mesa_lookup_enum_by_nr(ctx->RenderMode));
520
      _swsetup_Wakeup(ctx);
521
      _tnl_wakeup(ctx);
522
      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
523
      return;
524
   }
525
 
526
   /* Try drawing with the hardware, but don't do anything else if we can't
527
    * manage it.  swrast doesn't support our featureset, so we can't fall back
528
    * to it.
529
    */
530
   brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
531
}
532
 
533
void brw_draw_init( struct brw_context *brw )
534
{
535
   struct gl_context *ctx = &brw->ctx;
536
   struct vbo_context *vbo = vbo_context(ctx);
537
   int i;
538
 
539
   /* Register our drawing function:
540
    */
541
   vbo->draw_prims = brw_draw_prims;
542
 
543
   for (i = 0; i < VERT_ATTRIB_MAX; i++)
544
      brw->vb.inputs[i].buffer = -1;
545
   brw->vb.nr_buffers = 0;
546
   brw->vb.nr_enabled = 0;
547
}
548
 
549
void brw_draw_destroy( struct brw_context *brw )
550
{
551
   int i;
552
 
553
   for (i = 0; i < brw->vb.nr_buffers; i++) {
554
      drm_intel_bo_unreference(brw->vb.buffers[i].bo);
555
      brw->vb.buffers[i].bo = NULL;
556
   }
557
   brw->vb.nr_buffers = 0;
558
 
559
   for (i = 0; i < brw->vb.nr_enabled; i++) {
560
      brw->vb.enabled[i]->buffer = -1;
561
   }
562
   brw->vb.nr_enabled = 0;
563
 
564
   drm_intel_bo_unreference(brw->ib.bo);
565
   brw->ib.bo = NULL;
566
}