Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2352 Serge 1
#include "drmP.h"
2
#include "drm.h"
3
#include "i915_drm.h"
4
#include "i915_drv.h"
5
#include "intel_drv.h"
6
//#include
7
 
8
#undef mb
9
#undef rmb
10
#undef wmb
11
#define mb() asm volatile("mfence")
12
#define rmb() asm volatile ("lfence")
13
#define wmb() asm volatile ("sfence")
14
 
15
 
16
typedef struct
17
{
18
    struct drm_i915_gem_object *batch;
19
    struct list_head  objects;
20
    u32    exec_start;
21
    u32    exec_len;
22
 
23
}batchbuffer_t;
24
 
25
struct change_domains {
26
    uint32_t invalidate_domains;
27
    uint32_t flush_domains;
28
    uint32_t flush_rings;
29
    uint32_t flips;
30
};
31
 
32
/*
33
 * Set the next domain for the specified object. This
34
 * may not actually perform the necessary flushing/invaliding though,
35
 * as that may want to be batched with other set_domain operations
36
 *
37
 * This is (we hope) the only really tricky part of gem. The goal
38
 * is fairly simple -- track which caches hold bits of the object
39
 * and make sure they remain coherent. A few concrete examples may
40
 * help to explain how it works. For shorthand, we use the notation
41
 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
42
 * a pair of read and write domain masks.
43
 *
44
 * Case 1: the batch buffer
45
 *
46
 *  1. Allocated
47
 *  2. Written by CPU
48
 *  3. Mapped to GTT
49
 *  4. Read by GPU
50
 *  5. Unmapped from GTT
51
 *  6. Freed
52
 *
53
 *  Let's take these a step at a time
54
 *
55
 *  1. Allocated
56
 *      Pages allocated from the kernel may still have
57
 *      cache contents, so we set them to (CPU, CPU) always.
58
 *  2. Written by CPU (using pwrite)
59
 *      The pwrite function calls set_domain (CPU, CPU) and
60
 *      this function does nothing (as nothing changes)
61
 *  3. Mapped by GTT
62
 *      This function asserts that the object is not
63
 *      currently in any GPU-based read or write domains
64
 *  4. Read by GPU
65
 *      i915_gem_execbuffer calls set_domain (COMMAND, 0).
66
 *      As write_domain is zero, this function adds in the
67
 *      current read domains (CPU+COMMAND, 0).
68
 *      flush_domains is set to CPU.
69
 *      invalidate_domains is set to COMMAND
70
 *      clflush is run to get data out of the CPU caches
71
 *      then i915_dev_set_domain calls i915_gem_flush to
72
 *      emit an MI_FLUSH and drm_agp_chipset_flush
73
 *  5. Unmapped from GTT
74
 *      i915_gem_object_unbind calls set_domain (CPU, CPU)
75
 *      flush_domains and invalidate_domains end up both zero
76
 *      so no flushing/invalidating happens
77
 *  6. Freed
78
 *      yay, done
79
 *
80
 * Case 2: The shared render buffer
81
 *
82
 *  1. Allocated
83
 *  2. Mapped to GTT
84
 *  3. Read/written by GPU
85
 *  4. set_domain to (CPU,CPU)
86
 *  5. Read/written by CPU
87
 *  6. Read/written by GPU
88
 *
89
 *  1. Allocated
90
 *      Same as last example, (CPU, CPU)
91
 *  2. Mapped to GTT
92
 *      Nothing changes (assertions find that it is not in the GPU)
93
 *  3. Read/written by GPU
94
 *      execbuffer calls set_domain (RENDER, RENDER)
95
 *      flush_domains gets CPU
96
 *      invalidate_domains gets GPU
97
 *      clflush (obj)
98
 *      MI_FLUSH and drm_agp_chipset_flush
99
 *  4. set_domain (CPU, CPU)
100
 *      flush_domains gets GPU
101
 *      invalidate_domains gets CPU
102
 *      wait_rendering (obj) to make sure all drawing is complete.
103
 *      This will include an MI_FLUSH to get the data from GPU
104
 *      to memory
105
 *      clflush (obj) to invalidate the CPU cache
106
 *      Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
107
 *  5. Read/written by CPU
108
 *      cache lines are loaded and dirtied
109
 *  6. Read written by GPU
110
 *      Same as last GPU access
111
 *
112
 * Case 3: The constant buffer
113
 *
114
 *  1. Allocated
115
 *  2. Written by CPU
116
 *  3. Read by GPU
117
 *  4. Updated (written) by CPU again
118
 *  5. Read by GPU
119
 *
120
 *  1. Allocated
121
 *      (CPU, CPU)
122
 *  2. Written by CPU
123
 *      (CPU, CPU)
124
 *  3. Read by GPU
125
 *      (CPU+RENDER, 0)
126
 *      flush_domains = CPU
127
 *      invalidate_domains = RENDER
128
 *      clflush (obj)
129
 *      MI_FLUSH
130
 *      drm_agp_chipset_flush
131
 *  4. Updated (written) by CPU again
132
 *      (CPU, CPU)
133
 *      flush_domains = 0 (no previous write domain)
134
 *      invalidate_domains = 0 (no new read domains)
135
 *  5. Read by GPU
136
 *      (CPU+RENDER, 0)
137
 *      flush_domains = CPU
138
 *      invalidate_domains = RENDER
139
 *      clflush (obj)
140
 *      MI_FLUSH
141
 *      drm_agp_chipset_flush
142
 */
143
static void
144
i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
145
                  struct intel_ring_buffer *ring,
146
                  struct change_domains *cd)
147
{
148
    uint32_t invalidate_domains = 0, flush_domains = 0;
149
 
150
    /*
151
     * If the object isn't moving to a new write domain,
152
     * let the object stay in multiple read domains
153
     */
154
    if (obj->base.pending_write_domain == 0)
155
        obj->base.pending_read_domains |= obj->base.read_domains;
156
 
157
    /*
158
     * Flush the current write domain if
159
     * the new read domains don't match. Invalidate
160
     * any read domains which differ from the old
161
     * write domain
162
     */
163
    if (obj->base.write_domain &&
164
        (((obj->base.write_domain != obj->base.pending_read_domains ||
165
           obj->ring != ring)) ||
166
         (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
167
        flush_domains |= obj->base.write_domain;
168
        invalidate_domains |=
169
            obj->base.pending_read_domains & ~obj->base.write_domain;
170
    }
171
    /*
172
     * Invalidate any read caches which may have
173
     * stale data. That is, any new read domains.
174
     */
175
    invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
176
    if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
177
        i915_gem_clflush_object(obj);
178
 
179
    if (obj->base.pending_write_domain)
180
        cd->flips |= atomic_read(&obj->pending_flip);
181
 
182
    /* The actual obj->write_domain will be updated with
183
     * pending_write_domain after we emit the accumulated flush for all
184
     * of our domain changes in execbuffers (which clears objects'
185
     * write_domains).  So if we have a current write domain that we
186
     * aren't changing, set pending_write_domain to that.
187
     */
188
    if (flush_domains == 0 && obj->base.pending_write_domain == 0)
189
        obj->base.pending_write_domain = obj->base.write_domain;
190
 
191
    cd->invalidate_domains |= invalidate_domains;
192
    cd->flush_domains |= flush_domains;
193
    if (flush_domains & I915_GEM_GPU_DOMAINS)
194
        cd->flush_rings |= obj->ring->id;
195
    if (invalidate_domains & I915_GEM_GPU_DOMAINS)
196
        cd->flush_rings |= ring->id;
197
}
198
 
199
static int
200
i915_gem_execbuffer_flush(struct drm_device *dev,
201
              uint32_t invalidate_domains,
202
              uint32_t flush_domains,
203
              uint32_t flush_rings)
204
{
205
    drm_i915_private_t *dev_priv = dev->dev_private;
206
    int i, ret;
207
 
208
    if (flush_domains & I915_GEM_DOMAIN_CPU)
209
        intel_gtt_chipset_flush();
210
 
211
    if (flush_domains & I915_GEM_DOMAIN_GTT)
212
        wmb();
213
 
214
    if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
215
        for (i = 0; i < I915_NUM_RINGS; i++)
216
            if (flush_rings & (1 << i)) {
217
                ret = i915_gem_flush_ring(&dev_priv->ring[i],
218
                              invalidate_domains,
219
                              flush_domains);
220
                if (ret)
221
                    return ret;
222
            }
223
    }
224
 
225
    return 0;
226
}
227
 
228
static int
229
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
230
                struct list_head *objects)
231
{
232
    struct drm_i915_gem_object *obj;
233
    struct change_domains cd;
234
    int ret;
235
 
236
    memset(&cd, 0, sizeof(cd));
237
    list_for_each_entry(obj, objects, exec_list)
238
        i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
239
 
240
    if (cd.invalidate_domains | cd.flush_domains) {
241
        ret = i915_gem_execbuffer_flush(ring->dev,
242
                        cd.invalidate_domains,
243
                        cd.flush_domains,
244
                        cd.flush_rings);
245
        if (ret)
246
            return ret;
247
    }
248
 
249
//    if (cd.flips) {
250
//        ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
251
//        if (ret)
252
//            return ret;
253
//    }
254
 
255
//    list_for_each_entry(obj, objects, exec_list) {
256
//        ret = i915_gem_execbuffer_sync_rings(obj, ring);
257
//        if (ret)
258
//            return ret;
259
//    }
260
 
261
    return 0;
262
}
263
 
264
static void
265
i915_gem_execbuffer_move_to_active(struct list_head *objects,
266
                   struct intel_ring_buffer *ring,
267
                   u32 seqno)
268
{
269
    struct drm_i915_gem_object *obj;
270
 
271
    list_for_each_entry(obj, objects, exec_list) {
272
          u32 old_read = obj->base.read_domains;
273
          u32 old_write = obj->base.write_domain;
274
 
275
 
276
        obj->base.read_domains = obj->base.pending_read_domains;
277
        obj->base.write_domain = obj->base.pending_write_domain;
278
        obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
279
 
280
        i915_gem_object_move_to_active(obj, ring, seqno);
281
        if (obj->base.write_domain) {
282
            obj->dirty = 1;
283
            obj->pending_gpu_write = true;
284
            list_move_tail(&obj->gpu_write_list,
285
                       &ring->gpu_write_list);
286
//            intel_mark_busy(ring->dev, obj);
287
        }
288
 
289
//        trace_i915_gem_object_change_domain(obj, old_read, old_write);
290
    }
291
}
292
 
293
static void
294
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
295
                    struct intel_ring_buffer *ring)
296
{
297
    struct drm_i915_gem_request *request;
298
    u32 invalidate;
299
 
300
    /*
301
     * Ensure that the commands in the batch buffer are
302
     * finished before the interrupt fires.
303
     *
304
     * The sampler always gets flushed on i965 (sigh).
305
     */
306
    invalidate = I915_GEM_DOMAIN_COMMAND;
307
    if (INTEL_INFO(dev)->gen >= 4)
308
        invalidate |= I915_GEM_DOMAIN_SAMPLER;
309
    if (ring->flush(ring, invalidate, 0)) {
310
        i915_gem_next_request_seqno(ring);
311
        return;
312
    }
313
 
314
    /* Add a breadcrumb for the completion of the batch buffer */
315
    request = kzalloc(sizeof(*request), GFP_KERNEL);
316
    if (request == NULL || i915_add_request(ring, NULL, request)) {
317
        i915_gem_next_request_seqno(ring);
318
        kfree(request);
319
    }
320
}
321
 
322
 
323
int exec_batch(struct drm_device *dev, struct intel_ring_buffer *ring,
324
               batchbuffer_t *exec)
325
{
326
    drm_i915_private_t *dev_priv = dev->dev_private;
327
    struct drm_i915_gem_object *obj;
328
 
329
    u32 seqno;
330
    int i;
331
    int ret;
332
 
333
    ring = &dev_priv->ring[RCS];
334
 
335
    mutex_lock(&dev->struct_mutex);
336
 
337
    list_for_each_entry(obj, &exec->objects, exec_list)
338
    {
339
        obj->base.pending_read_domains = 0;
340
        obj->base.pending_write_domain = 0;
341
    };
342
 
343
    exec->batch->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
344
 
345
    ret = i915_gem_execbuffer_move_to_gpu(ring, &exec->objects);
346
    if (ret)
347
        goto err;
348
 
349
    seqno = i915_gem_next_request_seqno(ring);
350
//    for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
351
//        if (seqno < ring->sync_seqno[i]) {
352
            /* The GPU can not handle its semaphore value wrapping,
353
             * so every billion or so execbuffers, we need to stall
354
             * the GPU in order to reset the counters.
355
             */
356
//            ret = i915_gpu_idle(dev);
357
//            if (ret)
358
//                goto err;
359
 
360
//            BUG_ON(ring->sync_seqno[i]);
361
//        }
362
//    };
363
 
364
    ret = ring->dispatch_execbuffer(ring, exec->exec_start, exec->exec_len);
365
    if (ret)
366
        goto err;
367
 
368
    i915_gem_execbuffer_move_to_active(&exec->objects, ring, seqno);
369
    i915_gem_execbuffer_retire_commands(dev, ring);
370
 
371
err:
372
    mutex_unlock(&dev->struct_mutex);
373
 
374
    return ret;
375
 
376
};