WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/drivers/dri/i965/brw_urb.c Rev 5562 and /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/drivers/dri/i965/brw

Regard whitespace Rev 5562 → Rev 5563

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/drivers/dri/i965/brw_urb.c
 ,0 → 1,259
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one.  So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5.  There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+   GLuint min_nr_entries;
+   GLuint preferred_nr_entries;
+   GLuint min_entry_size;
+   GLuint max_entry_size;
+} limits[CS+1] = {
+   { 16, 32, 1, 5 },                    /* vs */
+   { 4, 8,  1, 5 },                     /* gs */
+   { 5, 10,  1, 5 },                    /* clp */
+   { 1, 8,  1, 12 },                    /* sf */
+   { 1, 4,  1, 32 }                     /* cs */
+};
+static bool check_urb_layout(struct brw_context *brw)
+{
+   brw->urb.vs_start = 0;
+   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+   return brw->urb.cs_start + brw->urb.nr_cs_entries *
+      brw->urb.csize <= brw->urb.size;
+}
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+   GLuint csize = brw->curbe.total_size;
+   GLuint vsize = brw->vs.prog_data->base.urb_entry_size;
+   GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+   if (csize < limits[CS].min_entry_size)
+      csize = limits[CS].min_entry_size;
+   if (vsize < limits[VS].min_entry_size)
+      vsize = limits[VS].min_entry_size;
+   if (sfsize < limits[SF].min_entry_size)
+      sfsize = limits[SF].min_entry_size;
+   if (brw->urb.vsize < vsize ||
+       brw->urb.sfsize < sfsize ||
+       brw->urb.csize < csize ||
+       (brw->urb.constrained && (brw->urb.vsize > vsize ||
+                                 brw->urb.sfsize > sfsize ||
+                                 brw->urb.csize > csize))) {
+      brw->urb.csize = csize;
+      brw->urb.sfsize = sfsize;
+      brw->urb.vsize = vsize;
+      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+      brw->urb.constrained = 0;
+      if (brw->gen == 5) {
+         brw->urb.nr_vs_entries = 128;
+         brw->urb.nr_sf_entries = 48;
+         if (check_urb_layout(brw)) {
+            goto done;
+         } else {
+            brw->urb.constrained = 1;
+            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+            brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+         }
+      } else if (brw->is_g4x) {
+         brw->urb.nr_vs_entries = 64;
+         if (check_urb_layout(brw)) {
+            goto done;
+         } else {
+            brw->urb.constrained = 1;
+            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+         }
+      }
+      if (!check_urb_layout(brw)) {
+         brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+         brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+         brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+         brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+         brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+         /* Mark us as operating with constrained nr_entries, so that next
+          * time we recalculate we'll resize the fences in the hope of
+          * escaping constrained mode and getting back to normal performance.
+          */
+         brw->urb.constrained = 1;
+         if (!check_urb_layout(brw)) {
+            /* This is impossible, given the maximal sizes of urb
+             * entries and the values for minimum nr of entries
+             * provided above.
+             */
+            printf("couldn't calculate URB layout!\n");
+            exit(1);
+         }
+         if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_PERF)))
+            printf("URB CONSTRAINED\n");
+      }
+done:
+      if (unlikely(INTEL_DEBUG & DEBUG_URB))
+         printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+                      brw->urb.vs_start,
+                      brw->urb.gs_start,
+                      brw->urb.clip_start,
+                      brw->urb.sf_start,
+                      brw->urb.cs_start,
+                      brw->urb.size);
+      brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+   }
+}
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CURBE_OFFSETS,
+      .cache = (CACHE_NEW_VS_PROG |
+                CACHE_NEW_SF_PROG)
+   },
+   .emit = recalculate_urb_fence
+};
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+   struct brw_urb_fence uf;
+   memset(&uf, 0, sizeof(uf));
+   uf.header.opcode = CMD_URB_FENCE;
+   uf.header.length = sizeof(uf)/4-2;
+   uf.header.vs_realloc = 1;
+   uf.header.gs_realloc = 1;
+   uf.header.clp_realloc = 1;
+   uf.header.sf_realloc = 1;
+   uf.header.vfe_realloc = 1;
+   uf.header.cs_realloc = 1;
+   /* The ordering below is correct, not the layout in the
+    * instruction.
+    *
+    * There are 256/384 urb reg pairs in total.
+    */
+   uf.bits0.vs_fence  = brw->urb.gs_start;
+   uf.bits0.gs_fence  = brw->urb.clip_start;
+   uf.bits0.clp_fence = brw->urb.sf_start;
+   uf.bits1.sf_fence  = brw->urb.cs_start;
+   uf.bits1.cs_fence  = brw->urb.size;
+   /* erratum: URB_FENCE must not cross a 64byte cacheline */
+   if ((brw->batch.used & 15) > 12) {
+      int pad = 16 - (brw->batch.used & 15);
+      do
+         brw->batch.map[brw->batch.used++] = MI_NOOP;
+      while (--pad);
+   }
+   BRW_BATCH_STRUCT(brw, &uf);
+}

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5562 → Rev 5563