Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. #include "drmP.h"
  2. #include "drm.h"
  3. #include "i915_drm.h"
  4. #include "i915_drv.h"
  5. #include "intel_drv.h"
  6. //#include
  7.  
  8. #undef mb
  9. #undef rmb
  10. #undef wmb
  11. #define mb() asm volatile("mfence")
  12. #define rmb() asm volatile ("lfence")
  13. #define wmb() asm volatile ("sfence")
  14.  
  15.  
  16. typedef struct
  17. {
  18.     struct drm_i915_gem_object *batch;
  19.     struct list_head  objects;
  20.     u32    exec_start;
  21.     u32    exec_len;
  22.  
  23. }batchbuffer_t;
  24.  
  25. struct change_domains {
  26.     uint32_t invalidate_domains;
  27.     uint32_t flush_domains;
  28.     uint32_t flush_rings;
  29.     uint32_t flips;
  30. };
  31.  
  32. /*
  33.  * Set the next domain for the specified object. This
  34.  * may not actually perform the necessary flushing/invaliding though,
  35.  * as that may want to be batched with other set_domain operations
  36.  *
  37.  * This is (we hope) the only really tricky part of gem. The goal
  38.  * is fairly simple -- track which caches hold bits of the object
  39.  * and make sure they remain coherent. A few concrete examples may
  40.  * help to explain how it works. For shorthand, we use the notation
  41.  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
  42.  * a pair of read and write domain masks.
  43.  *
  44.  * Case 1: the batch buffer
  45.  *
  46.  *  1. Allocated
  47.  *  2. Written by CPU
  48.  *  3. Mapped to GTT
  49.  *  4. Read by GPU
  50.  *  5. Unmapped from GTT
  51.  *  6. Freed
  52.  *
  53.  *  Let's take these a step at a time
  54.  *
  55.  *  1. Allocated
  56.  *      Pages allocated from the kernel may still have
  57.  *      cache contents, so we set them to (CPU, CPU) always.
  58.  *  2. Written by CPU (using pwrite)
  59.  *      The pwrite function calls set_domain (CPU, CPU) and
  60.  *      this function does nothing (as nothing changes)
  61.  *  3. Mapped by GTT
  62.  *      This function asserts that the object is not
  63.  *      currently in any GPU-based read or write domains
  64.  *  4. Read by GPU
  65.  *      i915_gem_execbuffer calls set_domain (COMMAND, 0).
  66.  *      As write_domain is zero, this function adds in the
  67.  *      current read domains (CPU+COMMAND, 0).
  68.  *      flush_domains is set to CPU.
  69.  *      invalidate_domains is set to COMMAND
  70.  *      clflush is run to get data out of the CPU caches
  71.  *      then i915_dev_set_domain calls i915_gem_flush to
  72.  *      emit an MI_FLUSH and drm_agp_chipset_flush
  73.  *  5. Unmapped from GTT
  74.  *      i915_gem_object_unbind calls set_domain (CPU, CPU)
  75.  *      flush_domains and invalidate_domains end up both zero
  76.  *      so no flushing/invalidating happens
  77.  *  6. Freed
  78.  *      yay, done
  79.  *
  80.  * Case 2: The shared render buffer
  81.  *
  82.  *  1. Allocated
  83.  *  2. Mapped to GTT
  84.  *  3. Read/written by GPU
  85.  *  4. set_domain to (CPU,CPU)
  86.  *  5. Read/written by CPU
  87.  *  6. Read/written by GPU
  88.  *
  89.  *  1. Allocated
  90.  *      Same as last example, (CPU, CPU)
  91.  *  2. Mapped to GTT
  92.  *      Nothing changes (assertions find that it is not in the GPU)
  93.  *  3. Read/written by GPU
  94.  *      execbuffer calls set_domain (RENDER, RENDER)
  95.  *      flush_domains gets CPU
  96.  *      invalidate_domains gets GPU
  97.  *      clflush (obj)
  98.  *      MI_FLUSH and drm_agp_chipset_flush
  99.  *  4. set_domain (CPU, CPU)
  100.  *      flush_domains gets GPU
  101.  *      invalidate_domains gets CPU
  102.  *      wait_rendering (obj) to make sure all drawing is complete.
  103.  *      This will include an MI_FLUSH to get the data from GPU
  104.  *      to memory
  105.  *      clflush (obj) to invalidate the CPU cache
  106.  *      Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
  107.  *  5. Read/written by CPU
  108.  *      cache lines are loaded and dirtied
  109.  *  6. Read written by GPU
  110.  *      Same as last GPU access
  111.  *
  112.  * Case 3: The constant buffer
  113.  *
  114.  *  1. Allocated
  115.  *  2. Written by CPU
  116.  *  3. Read by GPU
  117.  *  4. Updated (written) by CPU again
  118.  *  5. Read by GPU
  119.  *
  120.  *  1. Allocated
  121.  *      (CPU, CPU)
  122.  *  2. Written by CPU
  123.  *      (CPU, CPU)
  124.  *  3. Read by GPU
  125.  *      (CPU+RENDER, 0)
  126.  *      flush_domains = CPU
  127.  *      invalidate_domains = RENDER
  128.  *      clflush (obj)
  129.  *      MI_FLUSH
  130.  *      drm_agp_chipset_flush
  131.  *  4. Updated (written) by CPU again
  132.  *      (CPU, CPU)
  133.  *      flush_domains = 0 (no previous write domain)
  134.  *      invalidate_domains = 0 (no new read domains)
  135.  *  5. Read by GPU
  136.  *      (CPU+RENDER, 0)
  137.  *      flush_domains = CPU
  138.  *      invalidate_domains = RENDER
  139.  *      clflush (obj)
  140.  *      MI_FLUSH
  141.  *      drm_agp_chipset_flush
  142.  */
  143. static void
  144. i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
  145.                   struct intel_ring_buffer *ring,
  146.                   struct change_domains *cd)
  147. {
  148.     uint32_t invalidate_domains = 0, flush_domains = 0;
  149.  
  150.     /*
  151.      * If the object isn't moving to a new write domain,
  152.      * let the object stay in multiple read domains
  153.      */
  154.     if (obj->base.pending_write_domain == 0)
  155.         obj->base.pending_read_domains |= obj->base.read_domains;
  156.  
  157.     /*
  158.      * Flush the current write domain if
  159.      * the new read domains don't match. Invalidate
  160.      * any read domains which differ from the old
  161.      * write domain
  162.      */
  163.     if (obj->base.write_domain &&
  164.         (((obj->base.write_domain != obj->base.pending_read_domains ||
  165.            obj->ring != ring)) ||
  166.          (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
  167.         flush_domains |= obj->base.write_domain;
  168.         invalidate_domains |=
  169.             obj->base.pending_read_domains & ~obj->base.write_domain;
  170.     }
  171.     /*
  172.      * Invalidate any read caches which may have
  173.      * stale data. That is, any new read domains.
  174.      */
  175.     invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
  176.     if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
  177.         i915_gem_clflush_object(obj);
  178.  
  179.     if (obj->base.pending_write_domain)
  180.         cd->flips |= atomic_read(&obj->pending_flip);
  181.  
  182.     /* The actual obj->write_domain will be updated with
  183.      * pending_write_domain after we emit the accumulated flush for all
  184.      * of our domain changes in execbuffers (which clears objects'
  185.      * write_domains).  So if we have a current write domain that we
  186.      * aren't changing, set pending_write_domain to that.
  187.      */
  188.     if (flush_domains == 0 && obj->base.pending_write_domain == 0)
  189.         obj->base.pending_write_domain = obj->base.write_domain;
  190.  
  191.     cd->invalidate_domains |= invalidate_domains;
  192.     cd->flush_domains |= flush_domains;
  193.     if (flush_domains & I915_GEM_GPU_DOMAINS)
  194.         cd->flush_rings |= obj->ring->id;
  195.     if (invalidate_domains & I915_GEM_GPU_DOMAINS)
  196.         cd->flush_rings |= ring->id;
  197. }
  198.  
  199. static int
  200. i915_gem_execbuffer_flush(struct drm_device *dev,
  201.               uint32_t invalidate_domains,
  202.               uint32_t flush_domains,
  203.               uint32_t flush_rings)
  204. {
  205.     drm_i915_private_t *dev_priv = dev->dev_private;
  206.     int i, ret;
  207.  
  208.     if (flush_domains & I915_GEM_DOMAIN_CPU)
  209.         intel_gtt_chipset_flush();
  210.  
  211.     if (flush_domains & I915_GEM_DOMAIN_GTT)
  212.         wmb();
  213.  
  214.     if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
  215.         for (i = 0; i < I915_NUM_RINGS; i++)
  216.             if (flush_rings & (1 << i)) {
  217.                 ret = i915_gem_flush_ring(&dev_priv->ring[i],
  218.                               invalidate_domains,
  219.                               flush_domains);
  220.                 if (ret)
  221.                     return ret;
  222.             }
  223.     }
  224.  
  225.     return 0;
  226. }
  227.  
  228. static int
  229. i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
  230.                 struct list_head *objects)
  231. {
  232.     struct drm_i915_gem_object *obj;
  233.     struct change_domains cd;
  234.     int ret;
  235.  
  236.     memset(&cd, 0, sizeof(cd));
  237.     list_for_each_entry(obj, objects, exec_list)
  238.         i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
  239.  
  240.     if (cd.invalidate_domains | cd.flush_domains) {
  241.         ret = i915_gem_execbuffer_flush(ring->dev,
  242.                         cd.invalidate_domains,
  243.                         cd.flush_domains,
  244.                         cd.flush_rings);
  245.         if (ret)
  246.             return ret;
  247.     }
  248.  
  249. //    if (cd.flips) {
  250. //        ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
  251. //        if (ret)
  252. //            return ret;
  253. //    }
  254.  
  255. //    list_for_each_entry(obj, objects, exec_list) {
  256. //        ret = i915_gem_execbuffer_sync_rings(obj, ring);
  257. //        if (ret)
  258. //            return ret;
  259. //    }
  260.  
  261.     return 0;
  262. }
  263.  
  264. static void
  265. i915_gem_execbuffer_move_to_active(struct list_head *objects,
  266.                    struct intel_ring_buffer *ring,
  267.                    u32 seqno)
  268. {
  269.     struct drm_i915_gem_object *obj;
  270.  
  271.     list_for_each_entry(obj, objects, exec_list) {
  272.           u32 old_read = obj->base.read_domains;
  273.           u32 old_write = obj->base.write_domain;
  274.  
  275.  
  276.         obj->base.read_domains = obj->base.pending_read_domains;
  277.         obj->base.write_domain = obj->base.pending_write_domain;
  278.         obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
  279.  
  280.         i915_gem_object_move_to_active(obj, ring, seqno);
  281.         if (obj->base.write_domain) {
  282.             obj->dirty = 1;
  283.             obj->pending_gpu_write = true;
  284.             list_move_tail(&obj->gpu_write_list,
  285.                        &ring->gpu_write_list);
  286. //            intel_mark_busy(ring->dev, obj);
  287.         }
  288.  
  289. //        trace_i915_gem_object_change_domain(obj, old_read, old_write);
  290.     }
  291. }
  292.  
  293. static void
  294. i915_gem_execbuffer_retire_commands(struct drm_device *dev,
  295.                     struct intel_ring_buffer *ring)
  296. {
  297.     struct drm_i915_gem_request *request;
  298.     u32 invalidate;
  299.  
  300.     /*
  301.      * Ensure that the commands in the batch buffer are
  302.      * finished before the interrupt fires.
  303.      *
  304.      * The sampler always gets flushed on i965 (sigh).
  305.      */
  306.     invalidate = I915_GEM_DOMAIN_COMMAND;
  307.     if (INTEL_INFO(dev)->gen >= 4)
  308.         invalidate |= I915_GEM_DOMAIN_SAMPLER;
  309.     if (ring->flush(ring, invalidate, 0)) {
  310.         i915_gem_next_request_seqno(ring);
  311.         return;
  312.     }
  313.  
  314.     /* Add a breadcrumb for the completion of the batch buffer */
  315.     request = kzalloc(sizeof(*request), GFP_KERNEL);
  316.     if (request == NULL || i915_add_request(ring, NULL, request)) {
  317.         i915_gem_next_request_seqno(ring);
  318.         kfree(request);
  319.     }
  320. }
  321.  
  322.  
  323. int exec_batch(struct drm_device *dev, struct intel_ring_buffer *ring,
  324.                batchbuffer_t *exec)
  325. {
  326.     drm_i915_private_t *dev_priv = dev->dev_private;
  327.     struct drm_i915_gem_object *obj;
  328.  
  329.     u32 seqno;
  330.     int i;
  331.     int ret;
  332.  
  333.     ring = &dev_priv->ring[RCS];
  334.  
  335.     mutex_lock(&dev->struct_mutex);
  336.  
  337.     list_for_each_entry(obj, &exec->objects, exec_list)
  338.     {
  339.         obj->base.pending_read_domains = 0;
  340.         obj->base.pending_write_domain = 0;
  341.     };
  342.  
  343.     exec->batch->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
  344.  
  345.     ret = i915_gem_execbuffer_move_to_gpu(ring, &exec->objects);
  346.     if (ret)
  347.         goto err;
  348.  
  349.     seqno = i915_gem_next_request_seqno(ring);
  350. //    for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
  351. //        if (seqno < ring->sync_seqno[i]) {
  352.             /* The GPU can not handle its semaphore value wrapping,
  353.              * so every billion or so execbuffers, we need to stall
  354.              * the GPU in order to reset the counters.
  355.              */
  356. //            ret = i915_gpu_idle(dev);
  357. //            if (ret)
  358. //                goto err;
  359.  
  360. //            BUG_ON(ring->sync_seqno[i]);
  361. //        }
  362. //    };
  363.  
  364.     ret = ring->dispatch_execbuffer(ring, exec->exec_start, exec->exec_len);
  365.     if (ret)
  366.         goto err;
  367.  
  368.     i915_gem_execbuffer_move_to_active(&exec->objects, ring, seqno);
  369.     i915_gem_execbuffer_retire_commands(dev, ring);
  370.  
  371. err:
  372.     mutex_unlock(&dev->struct_mutex);
  373.  
  374.     return ret;
  375.  
  376. };
  377.