Rev 3254 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3254 | Serge | 1 | /* |
2 | * Copyright (c) 2011 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
21 | * SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Chris Wilson |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | #ifdef HAVE_CONFIG_H |
||
29 | #include "config.h" |
||
30 | #endif |
||
31 | |||
32 | #include "sna.h" |
||
33 | #include "sna_reg.h" |
||
34 | |||
35 | #define DBG_NO_HW 0 |
||
36 | #define DBG_NO_TILING 1 |
||
37 | #define DBG_NO_CACHE 0 |
||
38 | #define DBG_NO_CACHE_LEVEL 0 |
||
39 | #define DBG_NO_CPU 0 |
||
40 | #define DBG_NO_USERPTR 0 |
||
41 | #define DBG_NO_LLC 0 |
||
42 | #define DBG_NO_SEMAPHORES 0 |
||
43 | #define DBG_NO_MADV 0 |
||
44 | #define DBG_NO_UPLOAD_CACHE 0 |
||
45 | #define DBG_NO_UPLOAD_ACTIVE 0 |
||
46 | #define DBG_NO_MAP_UPLOAD 0 |
||
47 | #define DBG_NO_RELAXED_FENCING 0 |
||
48 | #define DBG_NO_SECURE_BATCHES 0 |
||
49 | #define DBG_NO_PINNED_BATCHES 0 |
||
50 | #define DBG_NO_FAST_RELOC 0 |
||
51 | #define DBG_NO_HANDLE_LUT 0 |
||
52 | #define DBG_DUMP 0 |
||
53 | |||
3255 | Serge | 54 | /* Worst case seems to be 965gm where we cannot write within a cacheline that |
55 | * is being simultaneously being read by the GPU, or within the sampler |
||
56 | * prefetch. In general, the chipsets seem to have a requirement that sampler |
||
57 | * offsets be aligned to a cacheline (64 bytes). |
||
58 | */ |
||
59 | #define UPLOAD_ALIGNMENT 128 |
||
60 | |||
61 | #define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) |
||
62 | #define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE) |
||
63 | |||
3254 | Serge | 64 | #define MAX_GTT_VMA_CACHE 512 |
65 | #define MAX_CPU_VMA_CACHE INT16_MAX |
||
66 | #define MAP_PRESERVE_TIME 10 |
||
67 | |||
68 | #define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) |
||
69 | #define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) |
||
70 | #define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) |
||
71 | #define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) |
||
72 | #define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) |
||
73 | |||
74 | #define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) |
||
75 | |||
76 | #define LOCAL_I915_PARAM_HAS_BLT 11 |
||
77 | #define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 |
||
78 | #define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 |
||
79 | #define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 |
||
80 | #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 |
||
81 | #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 |
||
82 | #define LOCAL_I915_PARAM_HAS_NO_RELOC 25 |
||
83 | #define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 |
||
84 | |||
3255 | Serge | 85 | static struct kgem_bo *__kgem_freed_bo; |
3254 | Serge | 86 | |
3255 | Serge | 87 | #define bucket(B) (B)->size.pages.bucket |
88 | #define num_pages(B) (B)->size.pages.count |
||
3254 | Serge | 89 | |
3255 | Serge | 90 | #ifdef DEBUG_MEMORY |
91 | static void debug_alloc(struct kgem *kgem, size_t size) |
||
92 | { |
||
93 | kgem->debug_memory.bo_allocs++; |
||
94 | kgem->debug_memory.bo_bytes += size; |
||
95 | } |
||
96 | static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) |
||
97 | { |
||
98 | debug_alloc(kgem, bytes(bo)); |
||
99 | } |
||
100 | #else |
||
101 | #define debug_alloc(k, b) |
||
102 | #define debug_alloc__bo(k, b) |
||
103 | #endif |
||
104 | |||
105 | static uint32_t gem_create(int fd, int num_pages) |
||
106 | { |
||
107 | struct drm_i915_gem_create create; |
||
108 | ioctl_t io; |
||
109 | |||
110 | VG_CLEAR(create); |
||
111 | create.handle = 0; |
||
112 | create.size = PAGE_SIZE * num_pages; |
||
113 | |||
114 | io.handle = fd; |
||
115 | io.io_code = SRV_I915_GEM_CREATE; |
||
116 | io.input = &create; |
||
117 | io.inp_size = sizeof(create); |
||
118 | io.output = NULL; |
||
119 | io.out_size = 0; |
||
120 | |||
121 | if (call_service(&io)!=0) |
||
122 | return 0; |
||
123 | |||
124 | return create.handle; |
||
125 | } |
||
126 | |||
127 | static void gem_close(int fd, uint32_t handle) |
||
128 | { |
||
129 | struct drm_gem_close close; |
||
130 | ioctl_t io; |
||
131 | |||
132 | VG_CLEAR(close); |
||
133 | close.handle = handle; |
||
134 | |||
135 | io.handle = fd; |
||
136 | io.io_code = SRV_DRM_GEM_CLOSE; |
||
137 | io.input = &close; |
||
138 | io.inp_size = sizeof(close); |
||
139 | io.output = NULL; |
||
140 | io.out_size = 0; |
||
141 | |||
142 | call_service(&io); |
||
143 | } |
||
144 | |||
145 | constant inline static unsigned long __fls(unsigned long word) |
||
146 | { |
||
147 | #if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__)) |
||
148 | asm("bsr %1,%0" |
||
149 | : "=r" (word) |
||
150 | : "rm" (word)); |
||
151 | return word; |
||
152 | #else |
||
153 | unsigned int v = 0; |
||
154 | |||
155 | while (word >>= 1) |
||
156 | v++; |
||
157 | |||
158 | return v; |
||
159 | #endif |
||
160 | } |
||
161 | |||
162 | constant inline static int cache_bucket(int num_pages) |
||
163 | { |
||
164 | return __fls(num_pages); |
||
165 | } |
||
166 | |||
167 | static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, |
||
168 | int handle, int num_pages) |
||
169 | { |
||
170 | assert(num_pages); |
||
171 | memset(bo, 0, sizeof(*bo)); |
||
172 | |||
173 | bo->refcnt = 1; |
||
174 | bo->handle = handle; |
||
175 | bo->target_handle = -1; |
||
176 | num_pages(bo) = num_pages; |
||
177 | bucket(bo) = cache_bucket(num_pages); |
||
178 | bo->reusable = true; |
||
179 | bo->domain = DOMAIN_CPU; |
||
180 | list_init(&bo->request); |
||
181 | list_init(&bo->list); |
||
182 | list_init(&bo->vma); |
||
183 | |||
184 | return bo; |
||
185 | } |
||
186 | |||
187 | static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) |
||
188 | { |
||
189 | struct kgem_bo *bo; |
||
190 | |||
191 | if (__kgem_freed_bo) { |
||
192 | bo = __kgem_freed_bo; |
||
193 | __kgem_freed_bo = *(struct kgem_bo **)bo; |
||
194 | } else { |
||
195 | bo = malloc(sizeof(*bo)); |
||
196 | if (bo == NULL) |
||
197 | return NULL; |
||
198 | } |
||
199 | |||
200 | return __kgem_bo_init(bo, handle, num_pages); |
||
201 | } |
||
202 | |||
3254 | Serge | 203 | static int gem_param(struct kgem *kgem, int name) |
204 | { |
||
205 | ioctl_t io; |
||
206 | |||
207 | drm_i915_getparam_t gp; |
||
208 | int v = -1; /* No param uses the sign bit, reserve it for errors */ |
||
209 | |||
210 | VG_CLEAR(gp); |
||
211 | gp.param = name; |
||
212 | gp.value = &v; |
||
213 | |||
214 | io.handle = kgem->fd; |
||
215 | io.io_code = SRV_GET_PARAM; |
||
216 | io.input = &gp; |
||
217 | io.inp_size = sizeof(gp); |
||
218 | io.output = NULL; |
||
219 | io.out_size = 0; |
||
220 | |||
221 | if (call_service(&io)!=0) |
||
222 | return -1; |
||
223 | |||
224 | VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); |
||
225 | return v; |
||
226 | } |
||
227 | |||
3255 | Serge | 228 | static bool test_has_execbuffer2(struct kgem *kgem) |
229 | { |
||
230 | return 1; |
||
231 | } |
||
232 | |||
3254 | Serge | 233 | static bool test_has_no_reloc(struct kgem *kgem) |
234 | { |
||
235 | if (DBG_NO_FAST_RELOC) |
||
236 | return false; |
||
237 | |||
238 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; |
||
239 | } |
||
240 | |||
241 | static bool test_has_handle_lut(struct kgem *kgem) |
||
242 | { |
||
243 | if (DBG_NO_HANDLE_LUT) |
||
244 | return false; |
||
245 | |||
246 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; |
||
247 | } |
||
248 | |||
249 | static bool test_has_semaphores_enabled(struct kgem *kgem) |
||
250 | { |
||
251 | FILE *file; |
||
252 | bool detected = false; |
||
253 | int ret; |
||
254 | |||
255 | if (DBG_NO_SEMAPHORES) |
||
256 | return false; |
||
257 | |||
258 | ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); |
||
259 | if (ret != -1) |
||
260 | return ret > 0; |
||
261 | |||
262 | return detected; |
||
263 | } |
||
264 | |||
3255 | Serge | 265 | static bool __kgem_throttle(struct kgem *kgem) |
266 | { |
||
267 | // if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) |
||
268 | return false; |
||
3254 | Serge | 269 | |
3255 | Serge | 270 | // return errno == EIO; |
271 | } |
||
272 | |||
273 | static bool is_hw_supported(struct kgem *kgem, |
||
274 | struct pci_device *dev) |
||
275 | { |
||
276 | if (DBG_NO_HW) |
||
277 | return false; |
||
278 | |||
279 | if (!test_has_execbuffer2(kgem)) |
||
280 | return false; |
||
281 | |||
282 | if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ |
||
283 | return kgem->has_blt; |
||
284 | |||
285 | /* Although pre-855gm the GMCH is fubar, it works mostly. So |
||
286 | * let the user decide through "NoAccel" whether or not to risk |
||
287 | * hw acceleration. |
||
288 | */ |
||
289 | |||
290 | if (kgem->gen == 060 && dev->revision < 8) { |
||
291 | /* pre-production SNB with dysfunctional BLT */ |
||
292 | return false; |
||
293 | } |
||
294 | |||
295 | if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */ |
||
296 | return kgem->has_blt; |
||
297 | |||
298 | return true; |
||
299 | } |
||
300 | |||
3254 | Serge | 301 | static bool test_has_relaxed_fencing(struct kgem *kgem) |
302 | { |
||
303 | if (kgem->gen < 040) { |
||
304 | if (DBG_NO_RELAXED_FENCING) |
||
305 | return false; |
||
306 | |||
307 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; |
||
308 | } else |
||
309 | return true; |
||
310 | } |
||
311 | |||
312 | static bool test_has_llc(struct kgem *kgem) |
||
313 | { |
||
314 | int has_llc = -1; |
||
315 | |||
316 | if (DBG_NO_LLC) |
||
317 | return false; |
||
318 | |||
319 | #if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ |
||
320 | has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); |
||
321 | #endif |
||
322 | if (has_llc == -1) { |
||
323 | DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); |
||
324 | has_llc = kgem->gen >= 060; |
||
325 | } |
||
326 | |||
327 | return has_llc; |
||
328 | } |
||
329 | |||
330 | static bool test_has_cacheing(struct kgem *kgem) |
||
331 | { |
||
332 | uint32_t handle; |
||
333 | bool ret = false; |
||
334 | |||
335 | if (DBG_NO_CACHE_LEVEL) |
||
336 | return false; |
||
337 | |||
338 | /* Incoherent blt and sampler hangs the GPU */ |
||
339 | if (kgem->gen == 040) |
||
340 | return false; |
||
341 | |||
342 | // handle = gem_create(kgem->fd, 1); |
||
343 | // if (handle == 0) |
||
344 | // return false; |
||
345 | |||
346 | // ret = gem_set_cacheing(kgem->fd, handle, UNCACHED); |
||
347 | // gem_close(kgem->fd, handle); |
||
348 | return ret; |
||
349 | } |
||
350 | |||
351 | static bool test_has_userptr(struct kgem *kgem) |
||
352 | { |
||
353 | #if defined(USE_USERPTR) |
||
354 | uint32_t handle; |
||
355 | void *ptr; |
||
356 | |||
357 | if (DBG_NO_USERPTR) |
||
358 | return false; |
||
359 | |||
360 | /* Incoherent blt and sampler hangs the GPU */ |
||
361 | if (kgem->gen == 040) |
||
362 | return false; |
||
363 | |||
364 | ptr = malloc(PAGE_SIZE); |
||
365 | handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); |
||
366 | gem_close(kgem->fd, handle); |
||
367 | free(ptr); |
||
368 | |||
369 | return handle != 0; |
||
370 | #else |
||
371 | return false; |
||
372 | #endif |
||
373 | } |
||
374 | |||
375 | static bool test_has_secure_batches(struct kgem *kgem) |
||
376 | { |
||
377 | if (DBG_NO_SECURE_BATCHES) |
||
378 | return false; |
||
379 | |||
380 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; |
||
381 | } |
||
382 | |||
383 | static bool test_has_pinned_batches(struct kgem *kgem) |
||
384 | { |
||
385 | if (DBG_NO_PINNED_BATCHES) |
||
386 | return false; |
||
387 | |||
388 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; |
||
389 | } |
||
390 | |||
391 | |||
3255 | Serge | 392 | static bool kgem_init_pinned_batches(struct kgem *kgem) |
393 | { |
||
394 | ioctl_t io; |
||
3254 | Serge | 395 | |
3255 | Serge | 396 | int count[2] = { 4, 2 }; |
397 | int size[2] = { 1, 4 }; |
||
398 | int n, i; |
||
399 | |||
400 | if (kgem->wedged) |
||
401 | return true; |
||
402 | |||
403 | for (n = 0; n < ARRAY_SIZE(count); n++) { |
||
404 | for (i = 0; i < count[n]; i++) { |
||
405 | struct drm_i915_gem_pin pin; |
||
406 | struct kgem_bo *bo; |
||
407 | |||
408 | VG_CLEAR(pin); |
||
409 | |||
410 | pin.handle = gem_create(kgem->fd, size[n]); |
||
411 | if (pin.handle == 0) |
||
412 | goto err; |
||
413 | |||
414 | DBG(("%s: new handle=%d, num_pages=%d\n", |
||
415 | __FUNCTION__, pin.handle, size[n])); |
||
416 | |||
417 | bo = __kgem_bo_alloc(pin.handle, size[n]); |
||
418 | if (bo == NULL) { |
||
419 | gem_close(kgem->fd, pin.handle); |
||
420 | goto err; |
||
421 | } |
||
422 | |||
423 | pin.alignment = 0; |
||
424 | |||
425 | io.handle = kgem->fd; |
||
426 | io.io_code = SRV_I915_GEM_PIN; |
||
427 | io.input = &pin; |
||
428 | io.inp_size = sizeof(pin); |
||
429 | io.output = NULL; |
||
430 | io.out_size = 0; |
||
431 | |||
432 | if (call_service(&io)!=0){ |
||
433 | gem_close(kgem->fd, pin.handle); |
||
434 | goto err; |
||
435 | } |
||
436 | bo->presumed_offset = pin.offset; |
||
437 | debug_alloc__bo(kgem, bo); |
||
438 | list_add(&bo->list, &kgem->pinned_batches[n]); |
||
439 | } |
||
440 | } |
||
441 | |||
442 | return true; |
||
443 | |||
444 | err: |
||
445 | for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { |
||
446 | while (!list_is_empty(&kgem->pinned_batches[n])) { |
||
447 | kgem_bo_destroy(kgem, |
||
448 | list_first_entry(&kgem->pinned_batches[n], |
||
449 | struct kgem_bo, list)); |
||
450 | } |
||
451 | } |
||
452 | |||
453 | /* For simplicity populate the lists with a single unpinned bo */ |
||
454 | for (n = 0; n < ARRAY_SIZE(count); n++) { |
||
455 | struct kgem_bo *bo; |
||
456 | uint32_t handle; |
||
457 | |||
458 | handle = gem_create(kgem->fd, size[n]); |
||
459 | if (handle == 0) |
||
460 | break; |
||
461 | |||
462 | bo = __kgem_bo_alloc(handle, size[n]); |
||
463 | if (bo == NULL) { |
||
464 | gem_close(kgem->fd, handle); |
||
465 | break; |
||
466 | } |
||
467 | |||
468 | debug_alloc__bo(kgem, bo); |
||
469 | list_add(&bo->list, &kgem->pinned_batches[n]); |
||
470 | } |
||
471 | return false; |
||
472 | } |
||
473 | |||
474 | |||
475 | |||
3254 | Serge | 476 | void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) |
477 | { |
||
478 | struct drm_i915_gem_get_aperture aperture; |
||
479 | size_t totalram; |
||
480 | unsigned half_gpu_max; |
||
481 | unsigned int i, j; |
||
482 | |||
483 | DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); |
||
484 | |||
485 | memset(kgem, 0, sizeof(*kgem)); |
||
486 | |||
487 | kgem->fd = fd; |
||
488 | kgem->gen = gen; |
||
489 | |||
490 | list_init(&kgem->requests[0]); |
||
491 | list_init(&kgem->requests[1]); |
||
492 | list_init(&kgem->batch_buffers); |
||
493 | list_init(&kgem->active_buffers); |
||
494 | list_init(&kgem->flushing); |
||
495 | list_init(&kgem->large); |
||
496 | list_init(&kgem->large_inactive); |
||
497 | list_init(&kgem->snoop); |
||
498 | list_init(&kgem->scanout); |
||
499 | for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) |
||
500 | list_init(&kgem->pinned_batches[i]); |
||
501 | for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) |
||
502 | list_init(&kgem->inactive[i]); |
||
503 | for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { |
||
504 | for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) |
||
505 | list_init(&kgem->active[i][j]); |
||
506 | } |
||
507 | for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { |
||
508 | for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) |
||
509 | list_init(&kgem->vma[i].inactive[j]); |
||
510 | } |
||
511 | kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; |
||
512 | kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; |
||
513 | |||
514 | kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; |
||
515 | DBG(("%s: has BLT ring? %d\n", __FUNCTION__, |
||
516 | kgem->has_blt)); |
||
517 | |||
518 | kgem->has_relaxed_delta = |
||
519 | gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; |
||
520 | DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, |
||
521 | kgem->has_relaxed_delta)); |
||
522 | |||
523 | kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); |
||
524 | DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, |
||
525 | kgem->has_relaxed_fencing)); |
||
526 | |||
527 | kgem->has_llc = test_has_llc(kgem); |
||
528 | DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, |
||
529 | kgem->has_llc)); |
||
530 | |||
531 | kgem->has_cacheing = test_has_cacheing(kgem); |
||
532 | DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, |
||
533 | kgem->has_cacheing)); |
||
534 | |||
535 | kgem->has_userptr = test_has_userptr(kgem); |
||
536 | DBG(("%s: has userptr? %d\n", __FUNCTION__, |
||
537 | kgem->has_userptr)); |
||
538 | |||
539 | kgem->has_no_reloc = test_has_no_reloc(kgem); |
||
540 | DBG(("%s: has no-reloc? %d\n", __FUNCTION__, |
||
541 | kgem->has_no_reloc)); |
||
542 | |||
543 | kgem->has_handle_lut = test_has_handle_lut(kgem); |
||
544 | DBG(("%s: has handle-lut? %d\n", __FUNCTION__, |
||
545 | kgem->has_handle_lut)); |
||
546 | |||
547 | kgem->has_semaphores = false; |
||
548 | if (kgem->has_blt && test_has_semaphores_enabled(kgem)) |
||
549 | kgem->has_semaphores = true; |
||
550 | DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, |
||
551 | kgem->has_semaphores)); |
||
552 | |||
553 | kgem->can_blt_cpu = gen >= 030; |
||
554 | DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, |
||
555 | kgem->can_blt_cpu)); |
||
556 | |||
557 | kgem->has_secure_batches = test_has_secure_batches(kgem); |
||
558 | DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, |
||
559 | kgem->has_secure_batches)); |
||
560 | |||
561 | kgem->has_pinned_batches = test_has_pinned_batches(kgem); |
||
562 | DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, |
||
563 | kgem->has_pinned_batches)); |
||
564 | |||
565 | if (!is_hw_supported(kgem, dev)) { |
||
3255 | Serge | 566 | printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); |
3254 | Serge | 567 | kgem->wedged = 1; |
568 | } else if (__kgem_throttle(kgem)) { |
||
3255 | Serge | 569 | printf("Detected a hung GPU, disabling acceleration.\n"); |
3254 | Serge | 570 | kgem->wedged = 1; |
571 | } |
||
572 | |||
573 | kgem->batch_size = ARRAY_SIZE(kgem->batch); |
||
574 | if (gen == 020 && !kgem->has_pinned_batches) |
||
575 | /* Limited to what we can pin */ |
||
576 | kgem->batch_size = 4*1024; |
||
577 | if (gen == 022) |
||
578 | /* 865g cannot handle a batch spanning multiple pages */ |
||
579 | kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); |
||
580 | if ((gen >> 3) == 7) |
||
581 | kgem->batch_size = 16*1024; |
||
582 | if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) |
||
583 | kgem->batch_size = 4*1024; |
||
584 | |||
585 | if (!kgem_init_pinned_batches(kgem) && gen == 020) { |
||
3255 | Serge | 586 | printf("Unable to reserve memory for GPU, disabling acceleration.\n"); |
3254 | Serge | 587 | kgem->wedged = 1; |
588 | } |
||
589 | |||
590 | DBG(("%s: maximum batch size? %d\n", __FUNCTION__, |
||
591 | kgem->batch_size)); |
||
592 | |||
593 | kgem->min_alignment = 4; |
||
594 | if (gen < 040) |
||
595 | kgem->min_alignment = 64; |
||
596 | |||
3255 | Serge | 597 | #if 0 |
598 | |||
3254 | Serge | 599 | kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; |
600 | DBG(("%s: half cpu cache %d pages\n", __FUNCTION__, |
||
601 | kgem->half_cpu_cache_pages)); |
||
602 | |||
603 | kgem->next_request = __kgem_request_alloc(kgem); |
||
604 | |||
605 | DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, |
||
606 | !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing), |
||
607 | kgem->has_llc, kgem->has_cacheing, kgem->has_userptr)); |
||
608 | |||
609 | VG_CLEAR(aperture); |
||
610 | aperture.aper_size = 0; |
||
611 | (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); |
||
612 | if (aperture.aper_size == 0) |
||
613 | aperture.aper_size = 64*1024*1024; |
||
614 | |||
615 | DBG(("%s: aperture size %lld, available now %lld\n", |
||
616 | __FUNCTION__, |
||
617 | (long long)aperture.aper_size, |
||
618 | (long long)aperture.aper_available_size)); |
||
619 | |||
620 | kgem->aperture_total = aperture.aper_size; |
||
621 | kgem->aperture_high = aperture.aper_size * 3/4; |
||
622 | kgem->aperture_low = aperture.aper_size * 1/3; |
||
623 | if (gen < 033) { |
||
624 | /* Severe alignment penalties */ |
||
625 | kgem->aperture_high /= 2; |
||
626 | kgem->aperture_low /= 2; |
||
627 | } |
||
628 | DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, |
||
629 | kgem->aperture_low, kgem->aperture_low / (1024*1024), |
||
630 | kgem->aperture_high, kgem->aperture_high / (1024*1024))); |
||
631 | |||
632 | kgem->aperture_mappable = agp_aperture_size(dev, gen); |
||
633 | if (kgem->aperture_mappable == 0 || |
||
634 | kgem->aperture_mappable > aperture.aper_size) |
||
635 | kgem->aperture_mappable = aperture.aper_size; |
||
636 | DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, |
||
637 | kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); |
||
638 | |||
639 | kgem->buffer_size = 64 * 1024; |
||
640 | while (kgem->buffer_size < kgem->aperture_mappable >> 10) |
||
641 | kgem->buffer_size *= 2; |
||
642 | if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) |
||
643 | kgem->buffer_size = kgem->half_cpu_cache_pages << 12; |
||
644 | DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, |
||
645 | kgem->buffer_size, kgem->buffer_size / 1024)); |
||
646 | |||
647 | kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; |
||
648 | kgem->max_gpu_size = kgem->max_object_size; |
||
649 | if (!kgem->has_llc) |
||
650 | kgem->max_gpu_size = MAX_CACHE_SIZE; |
||
651 | |||
652 | totalram = total_ram_size(); |
||
653 | if (totalram == 0) { |
||
654 | DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", |
||
655 | __FUNCTION__)); |
||
656 | totalram = kgem->aperture_total; |
||
657 | } |
||
658 | DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); |
||
659 | if (kgem->max_object_size > totalram / 2) |
||
660 | kgem->max_object_size = totalram / 2; |
||
661 | if (kgem->max_gpu_size > totalram / 4) |
||
662 | kgem->max_gpu_size = totalram / 4; |
||
663 | |||
664 | kgem->max_cpu_size = kgem->max_object_size; |
||
665 | |||
666 | half_gpu_max = kgem->max_gpu_size / 2; |
||
667 | kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; |
||
668 | if (kgem->max_copy_tile_size > half_gpu_max) |
||
669 | kgem->max_copy_tile_size = half_gpu_max; |
||
670 | |||
671 | if (kgem->has_llc) |
||
672 | kgem->max_upload_tile_size = kgem->max_copy_tile_size; |
||
673 | else |
||
674 | kgem->max_upload_tile_size = kgem->aperture_mappable / 4; |
||
675 | if (kgem->max_upload_tile_size > half_gpu_max) |
||
676 | kgem->max_upload_tile_size = half_gpu_max; |
||
677 | |||
678 | kgem->large_object_size = MAX_CACHE_SIZE; |
||
679 | if (kgem->large_object_size > kgem->max_gpu_size) |
||
680 | kgem->large_object_size = kgem->max_gpu_size; |
||
681 | |||
682 | if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) { |
||
683 | if (kgem->large_object_size > kgem->max_cpu_size) |
||
684 | kgem->large_object_size = kgem->max_cpu_size; |
||
685 | } else |
||
686 | kgem->max_cpu_size = 0; |
||
687 | if (DBG_NO_CPU) |
||
688 | kgem->max_cpu_size = 0; |
||
689 | |||
690 | DBG(("%s: maximum object size=%d\n", |
||
691 | __FUNCTION__, kgem->max_object_size)); |
||
692 | DBG(("%s: large object thresold=%d\n", |
||
693 | __FUNCTION__, kgem->large_object_size)); |
||
694 | DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", |
||
695 | __FUNCTION__, |
||
696 | kgem->max_gpu_size, kgem->max_cpu_size, |
||
697 | kgem->max_upload_tile_size, kgem->max_copy_tile_size)); |
||
698 | |||
699 | /* Convert the aperture thresholds to pages */ |
||
700 | kgem->aperture_low /= PAGE_SIZE; |
||
701 | kgem->aperture_high /= PAGE_SIZE; |
||
702 | |||
703 | kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; |
||
704 | if ((int)kgem->fence_max < 0) |
||
705 | kgem->fence_max = 5; /* minimum safe value for all hw */ |
||
706 | DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); |
||
707 | |||
708 | kgem->batch_flags_base = 0; |
||
709 | if (kgem->has_no_reloc) |
||
710 | kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; |
||
711 | if (kgem->has_handle_lut) |
||
712 | kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; |
||
713 | if (kgem->has_pinned_batches) |
||
714 | kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; |
||
715 | |||
716 | #endif |
||
717 | |||
718 | } |
||
719 | |||
720 | |||
721 | |||
722 | void kgem_clear_dirty(struct kgem *kgem) |
||
723 | { |
||
724 | struct list * const buffers = &kgem->next_request->buffers; |
||
725 | struct kgem_bo *bo; |
||
726 | |||
727 | list_for_each_entry(bo, buffers, request) { |
||
728 | if (!bo->dirty) |
||
729 | break; |
||
730 | |||
731 | bo->dirty = false; |
||
732 | } |
||
733 | } |
||
734 | |||
735 | |||
736 | |||
737 | uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) |
||
738 | { |
||
739 | struct kgem_bo_binding *b; |
||
740 | |||
741 | for (b = &bo->binding; b && b->offset; b = b->next) |
||
742 | if (format == b->format) |
||
743 | return b->offset; |
||
744 | |||
745 | return 0; |
||
746 | } |
||
747 | |||
748 | void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) |
||
749 | { |
||
750 | struct kgem_bo_binding *b; |
||
751 | |||
752 | for (b = &bo->binding; b; b = b->next) { |
||
753 | if (b->offset) |
||
754 | continue; |
||
755 | |||
756 | b->offset = offset; |
||
757 | b->format = format; |
||
758 | |||
759 | if (b->next) |
||
760 | b->next->offset = 0; |
||
761 | |||
762 | return; |
||
763 | } |
||
764 | |||
765 | b = malloc(sizeof(*b)); |
||
766 | if (b) { |
||
767 | b->next = bo->binding.next; |
||
768 | b->format = format; |
||
769 | b->offset = offset; |
||
770 | bo->binding.next = b; |
||
771 | } |
||
772 | } |
||
773 | |||
774 | uint32_t kgem_add_reloc(struct kgem *kgem, |
||
775 | uint32_t pos, |
||
776 | struct kgem_bo *bo, |
||
777 | uint32_t read_write_domain, |
||
778 | uint32_t delta) |
||
779 | { |
||
780 | return 0; |
||
781 | } |
||
782 | |||
783 | void kgem_reset(struct kgem *kgem) |
||
784 | { |
||
785 | |||
786 | }; |
||
787 | |||
788 | void _kgem_submit(struct kgem *kgem) |
||
789 | { |
||
790 | }; |
||
791 | |||
792 | struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) |
||
793 | { |
||
794 | struct kgem_bo *bo = NULL; |
||
795 | |||
796 | return bo; |
||
797 | }; |
||
798 | |||
799 | void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) |
||
800 | { |
||
801 | |||
802 | |||
803 | };>><>><>>>>>>>>>>>>>>>> |