Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/**************************************************************************
2
 *
3
 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
#include "intel_batchbuffer.h"
29
#include "intel_buffer_objects.h"
30
#include "intel_reg.h"
31
#include "intel_bufmgr.h"
32
#include "intel_buffers.h"
33
#include "brw_context.h"
34
 
35
static void
36
intel_batchbuffer_reset(struct brw_context *brw);
37
 
38
struct cached_batch_item {
39
   struct cached_batch_item *next;
40
   uint16_t header;
41
   uint16_t size;
42
};
43
 
44
static void
45
clear_cache(struct brw_context *brw)
46
{
47
   struct cached_batch_item *item = brw->batch.cached_items;
48
 
49
   while (item) {
50
      struct cached_batch_item *next = item->next;
51
      free(item);
52
      item = next;
53
   }
54
 
55
   brw->batch.cached_items = NULL;
56
}
57
 
58
void
59
intel_batchbuffer_init(struct brw_context *brw)
60
{
61
   intel_batchbuffer_reset(brw);
62
 
63
   if (brw->gen >= 6) {
64
      /* We can't just use brw_state_batch to get a chunk of space for
65
       * the gen6 workaround because it involves actually writing to
66
       * the buffer, and the kernel doesn't let us write to the batch.
67
       */
68
      brw->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
69
						      "pipe_control workaround",
70
						      4096, 4096);
71
   }
72
 
73
   if (!brw->has_llc) {
74
      brw->batch.cpu_map = malloc(BATCH_SZ);
75
      brw->batch.map = brw->batch.cpu_map;
76
   }
77
}
78
 
79
static void
80
intel_batchbuffer_reset(struct brw_context *brw)
81
{
82
   if (brw->batch.last_bo != NULL) {
83
      drm_intel_bo_unreference(brw->batch.last_bo);
84
      brw->batch.last_bo = NULL;
85
   }
86
   brw->batch.last_bo = brw->batch.bo;
87
 
88
   clear_cache(brw);
89
 
90
   brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
91
					BATCH_SZ, 4096);
92
   if (brw->has_llc) {
93
      drm_intel_bo_map(brw->batch.bo, true);
94
      brw->batch.map = brw->batch.bo->virtual;
95
   }
96
 
97
   brw->batch.reserved_space = BATCH_RESERVED;
98
   brw->batch.state_batch_offset = brw->batch.bo->size;
99
   brw->batch.used = 0;
100
   brw->batch.needs_sol_reset = false;
101
}
102
 
103
void
104
intel_batchbuffer_save_state(struct brw_context *brw)
105
{
106
   brw->batch.saved.used = brw->batch.used;
107
   brw->batch.saved.reloc_count =
108
      drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
109
}
110
 
111
void
112
intel_batchbuffer_reset_to_saved(struct brw_context *brw)
113
{
114
   drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
115
 
116
   brw->batch.used = brw->batch.saved.used;
117
 
118
   /* Cached batch state is dead, since we just cleared some unknown part of the
119
    * batchbuffer.  Assume that the caller resets any other state necessary.
120
    */
121
   clear_cache(brw);
122
}
123
 
124
void
125
intel_batchbuffer_free(struct brw_context *brw)
126
{
127
   free(brw->batch.cpu_map);
128
   drm_intel_bo_unreference(brw->batch.last_bo);
129
   drm_intel_bo_unreference(brw->batch.bo);
130
   drm_intel_bo_unreference(brw->batch.workaround_bo);
131
   clear_cache(brw);
132
}
133
 
134
#if 0
135
static void
136
do_batch_dump(struct brw_context *brw)
137
{
138
   struct drm_intel_decode *decode;
139
   struct intel_batchbuffer *batch = &brw->batch;
140
   int ret;
141
 
142
   decode = drm_intel_decode_context_alloc(brw->intelScreen->deviceID);
143
   if (!decode)
144
      return;
145
 
146
   ret = drm_intel_bo_map(batch->bo, false);
147
   if (ret == 0) {
148
      drm_intel_decode_set_batch_pointer(decode,
149
					 batch->bo->virtual,
150
					 batch->bo->offset,
151
					 batch->used);
152
   } else {
153
      fprintf(stderr,
154
	      "WARNING: failed to map batchbuffer (%s), "
155
	      "dumping uploaded data instead.\n", strerror(ret));
156
 
157
      drm_intel_decode_set_batch_pointer(decode,
158
					 batch->map,
159
					 batch->bo->offset,
160
					 batch->used);
161
   }
162
 
163
   drm_intel_decode(decode);
164
 
165
   drm_intel_decode_context_free(decode);
166
 
167
   if (ret == 0) {
168
      drm_intel_bo_unmap(batch->bo);
169
 
170
      brw_debug_batch(brw);
171
   }
172
}
173
#endif
174
 
175
/* TODO: Push this whole function into bufmgr.
176
 */
177
static int
178
do_flush_locked(struct brw_context *brw)
179
{
180
   struct intel_batchbuffer *batch = &brw->batch;
181
   int ret = 0;
182
 
183
   if (brw->has_llc) {
184
      drm_intel_bo_unmap(batch->bo);
185
   } else {
186
      ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
187
      if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
188
	 ret = drm_intel_bo_subdata(batch->bo,
189
				    batch->state_batch_offset,
190
				    batch->bo->size - batch->state_batch_offset,
191
				    (char *)batch->map + batch->state_batch_offset);
192
      }
193
   }
194
 
195
   if (!brw->intelScreen->no_hw) {
196
      int flags;
197
 
198
      if (brw->gen < 6 || !batch->is_blit) {
199
	 flags = I915_EXEC_RENDER;
200
      } else {
201
	 flags = I915_EXEC_BLT;
202
      }
203
 
204
      if (batch->needs_sol_reset)
205
	 flags |= I915_EXEC_GEN7_SOL_RESET;
206
 
207
      if (ret == 0) {
208
         if (unlikely(INTEL_DEBUG & DEBUG_AUB))
209
            brw_annotate_aub(brw);
210
	 if (brw->hw_ctx == NULL || batch->is_blit) {
211
	    ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
212
					flags);
213
	 } else {
214
	    ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx,
215
						4 * batch->used, flags);
216
	 }
217
      }
218
   }
219
 
220
//   if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
221
//      do_batch_dump(brw);
222
 
223
   if (ret != 0) {
224
      fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
225
      exit(1);
226
   }
227
   brw->vtbl.new_batch(brw);
228
 
229
   return ret;
230
}
231
 
232
int
233
_intel_batchbuffer_flush(struct brw_context *brw,
234
			 const char *file, int line)
235
{
236
   int ret;
237
 
238
   if (brw->batch.used == 0)
239
      return 0;
240
 
241
   if (brw->first_post_swapbuffers_batch == NULL) {
242
      brw->first_post_swapbuffers_batch = brw->batch.bo;
243
      drm_intel_bo_reference(brw->first_post_swapbuffers_batch);
244
   }
245
 
246
   if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
247
      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
248
	      4*brw->batch.used);
249
 
250
   brw->batch.reserved_space = 0;
251
 
252
   if (brw->vtbl.finish_batch)
253
      brw->vtbl.finish_batch(brw);
254
 
255
   /* Mark the end of the buffer. */
256
   intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
257
   if (brw->batch.used & 1) {
258
      /* Round batchbuffer usage to 2 DWORDs. */
259
      intel_batchbuffer_emit_dword(brw, MI_NOOP);
260
   }
261
 
262
   intel_upload_finish(brw);
263
 
264
   /* Check that we didn't just wrap our batchbuffer at a bad time. */
265
   assert(!brw->no_batch_wrap);
266
 
267
   ret = do_flush_locked(brw);
268
 
269
   if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
270
      fprintf(stderr, "waiting for idle\n");
271
      drm_intel_bo_wait_rendering(brw->batch.bo);
272
   }
273
 
274
   /* Reset the buffer:
275
    */
276
   intel_batchbuffer_reset(brw);
277
 
278
   return ret;
279
}
280
 
281
 
282
/*  This is the only way buffers get added to the validate list.
283
 */
284
bool
285
intel_batchbuffer_emit_reloc(struct brw_context *brw,
286
                             drm_intel_bo *buffer,
287
                             uint32_t read_domains, uint32_t write_domain,
288
			     uint32_t delta)
289
{
290
   int ret;
291
 
292
   ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
293
				 buffer, delta,
294
				 read_domains, write_domain);
295
   assert(ret == 0);
296
   (void)ret;
297
 
298
   /*
299
    * Using the old buffer offset, write in what the right data would be, in case
300
    * the buffer doesn't move and we can short-circuit the relocation processing
301
    * in the kernel
302
    */
303
   intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
304
 
305
   return true;
306
}
307
 
308
bool
309
intel_batchbuffer_emit_reloc_fenced(struct brw_context *brw,
310
				    drm_intel_bo *buffer,
311
				    uint32_t read_domains,
312
				    uint32_t write_domain,
313
				    uint32_t delta)
314
{
315
   int ret;
316
 
317
   ret = drm_intel_bo_emit_reloc_fence(brw->batch.bo, 4*brw->batch.used,
318
				       buffer, delta,
319
				       read_domains, write_domain);
320
   assert(ret == 0);
321
   (void)ret;
322
 
323
   /*
324
    * Using the old buffer offset, write in what the right data would
325
    * be, in case the buffer doesn't move and we can short-circuit the
326
    * relocation processing in the kernel
327
    */
328
   intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
329
 
330
   return true;
331
}
332
 
333
void
334
intel_batchbuffer_data(struct brw_context *brw,
335
                       const void *data, GLuint bytes, bool is_blit)
336
{
337
   assert((bytes & 3) == 0);
338
   intel_batchbuffer_require_space(brw, bytes, is_blit);
339
   __memcpy(brw->batch.map + brw->batch.used, data, bytes);
340
   brw->batch.used += bytes >> 2;
341
}
342
 
343
void
344
intel_batchbuffer_cached_advance(struct brw_context *brw)
345
{
346
   struct cached_batch_item **prev = &brw->batch.cached_items, *item;
347
   uint32_t sz = (brw->batch.used - brw->batch.emit) * sizeof(uint32_t);
348
   uint32_t *start = brw->batch.map + brw->batch.emit;
349
   uint16_t op = *start >> 16;
350
 
351
   while (*prev) {
352
      uint32_t *old;
353
 
354
      item = *prev;
355
      old = brw->batch.map + item->header;
356
      if (op == *old >> 16) {
357
	 if (item->size == sz && memcmp(old, start, sz) == 0) {
358
	    if (prev != &brw->batch.cached_items) {
359
	       *prev = item->next;
360
	       item->next = brw->batch.cached_items;
361
	       brw->batch.cached_items = item;
362
	    }
363
	    brw->batch.used = brw->batch.emit;
364
	    return;
365
	 }
366
 
367
	 goto emit;
368
      }
369
      prev = &item->next;
370
   }
371
 
372
   item = malloc(sizeof(struct cached_batch_item));
373
   if (item == NULL)
374
      return;
375
 
376
   item->next = brw->batch.cached_items;
377
   brw->batch.cached_items = item;
378
 
379
emit:
380
   item->size = sz;
381
   item->header = brw->batch.emit;
382
}
383
 
384
/**
385
 * Restriction [DevSNB, DevIVB]:
386
 *
387
 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
388
 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
389
 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
390
 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
391
 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
392
 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
393
 * unless SW can otherwise guarantee that the pipeline from WM onwards is
394
 * already flushed (e.g., via a preceding MI_FLUSH).
395
 */
396
void
397
intel_emit_depth_stall_flushes(struct brw_context *brw)
398
{
399
   assert(brw->gen >= 6 && brw->gen <= 7);
400
 
401
   BEGIN_BATCH(4);
402
   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
403
   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
404
   OUT_BATCH(0); /* address */
405
   OUT_BATCH(0); /* write data */
406
   ADVANCE_BATCH()
407
 
408
   BEGIN_BATCH(4);
409
   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
410
   OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
411
   OUT_BATCH(0); /* address */
412
   OUT_BATCH(0); /* write data */
413
   ADVANCE_BATCH();
414
 
415
   BEGIN_BATCH(4);
416
   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
417
   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
418
   OUT_BATCH(0); /* address */
419
   OUT_BATCH(0); /* write data */
420
   ADVANCE_BATCH();
421
}
422
 
423
/**
424
 * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
425
 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
426
 *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
427
 *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
428
 *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
429
 *  to be sent before any combination of VS associated 3DSTATE."
430
 */
431
void
432
gen7_emit_vs_workaround_flush(struct brw_context *brw)
433
{
434
   assert(brw->gen == 7);
435
 
436
   BEGIN_BATCH(4);
437
   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
438
   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
439
   OUT_RELOC(brw->batch.workaround_bo,
440
	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
441
   OUT_BATCH(0); /* write data */
442
   ADVANCE_BATCH();
443
}
444
 
445
/**
446
 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
447
 * implementing two workarounds on gen6.  From section 1.4.7.1
448
 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
449
 *
450
 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
451
 * produced by non-pipelined state commands), software needs to first
452
 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
453
 * 0.
454
 *
455
 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
456
 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
457
 *
458
 * And the workaround for these two requires this workaround first:
459
 *
460
 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
461
 * BEFORE the pipe-control with a post-sync op and no write-cache
462
 * flushes.
463
 *
464
 * And this last workaround is tricky because of the requirements on
465
 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
466
 * volume 2 part 1:
467
 *
468
 *     "1 of the following must also be set:
469
 *      - Render Target Cache Flush Enable ([12] of DW1)
470
 *      - Depth Cache Flush Enable ([0] of DW1)
471
 *      - Stall at Pixel Scoreboard ([1] of DW1)
472
 *      - Depth Stall ([13] of DW1)
473
 *      - Post-Sync Operation ([13] of DW1)
474
 *      - Notify Enable ([8] of DW1)"
475
 *
476
 * The cache flushes require the workaround flush that triggered this
477
 * one, so we can't use it.  Depth stall would trigger the same.
478
 * Post-sync nonzero is what triggered this second workaround, so we
479
 * can't use that one either.  Notify enable is IRQs, which aren't
480
 * really our business.  That leaves only stall at scoreboard.
481
 */
482
void
483
intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
484
{
485
   if (!brw->batch.need_workaround_flush)
486
      return;
487
 
488
   BEGIN_BATCH(4);
489
   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
490
   OUT_BATCH(PIPE_CONTROL_CS_STALL |
491
	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
492
   OUT_BATCH(0); /* address */
493
   OUT_BATCH(0); /* write data */
494
   ADVANCE_BATCH();
495
 
496
   BEGIN_BATCH(4);
497
   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
498
   OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
499
   OUT_RELOC(brw->batch.workaround_bo,
500
	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
501
   OUT_BATCH(0); /* write data */
502
   ADVANCE_BATCH();
503
 
504
   brw->batch.need_workaround_flush = false;
505
}
506
 
507
/* Emit a pipelined flush to either flush render and texture cache for
508
 * reading from a FBO-drawn texture, or flush so that frontbuffer
509
 * render appears on the screen in DRI1.
510
 *
511
 * This is also used for the always_flush_cache driconf debug option.
512
 */
513
void
514
intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
515
{
516
   if (brw->gen >= 6) {
517
      if (brw->batch.is_blit) {
518
	 BEGIN_BATCH_BLT(4);
519
	 OUT_BATCH(MI_FLUSH_DW);
520
	 OUT_BATCH(0);
521
	 OUT_BATCH(0);
522
	 OUT_BATCH(0);
523
	 ADVANCE_BATCH();
524
      } else {
525
	 if (brw->gen == 6) {
526
	    /* Hardware workaround: SNB B-Spec says:
527
	     *
528
	     * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
529
	     * Flush Enable =1, a PIPE_CONTROL with any non-zero
530
	     * post-sync-op is required.
531
	     */
532
	    intel_emit_post_sync_nonzero_flush(brw);
533
	 }
534
 
535
	 BEGIN_BATCH(4);
536
	 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
537
	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
538
		   PIPE_CONTROL_WRITE_FLUSH |
539
		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
540
                   PIPE_CONTROL_VF_CACHE_INVALIDATE |
541
		   PIPE_CONTROL_TC_FLUSH |
542
		   PIPE_CONTROL_NO_WRITE |
543
                   PIPE_CONTROL_CS_STALL);
544
	 OUT_BATCH(0); /* write address */
545
	 OUT_BATCH(0); /* write data */
546
	 ADVANCE_BATCH();
547
      }
548
   } else {
549
      BEGIN_BATCH(4);
550
      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
551
		PIPE_CONTROL_WRITE_FLUSH |
552
		PIPE_CONTROL_NO_WRITE);
553
      OUT_BATCH(0); /* write address */
554
      OUT_BATCH(0); /* write data */
555
      OUT_BATCH(0); /* write data */
556
      ADVANCE_BATCH();
557
   }
558
}