Rev 3254 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 3254 | Rev 3255 | ||
---|---|---|---|
Line 49... | Line 49... | ||
49 | #define DBG_NO_PINNED_BATCHES 0 |
49 | #define DBG_NO_PINNED_BATCHES 0 |
50 | #define DBG_NO_FAST_RELOC 0 |
50 | #define DBG_NO_FAST_RELOC 0 |
51 | #define DBG_NO_HANDLE_LUT 0 |
51 | #define DBG_NO_HANDLE_LUT 0 |
52 | #define DBG_DUMP 0 |
52 | #define DBG_DUMP 0 |
Line -... | Line 53... | ||
- | 53 | ||
- | 54 | /* Worst case seems to be 965gm where we cannot write within a cacheline that |
|
- | 55 | * is being simultaneously being read by the GPU, or within the sampler |
|
- | 56 | * prefetch. In general, the chipsets seem to have a requirement that sampler |
|
- | 57 | * offsets be aligned to a cacheline (64 bytes). |
|
- | 58 | */ |
|
- | 59 | #define UPLOAD_ALIGNMENT 128 |
|
- | 60 | ||
- | 61 | #define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) |
|
- | 62 | #define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE) |
|
53 | 63 | ||
54 | #define MAX_GTT_VMA_CACHE 512 |
64 | #define MAX_GTT_VMA_CACHE 512 |
55 | #define MAX_CPU_VMA_CACHE INT16_MAX |
65 | #define MAX_CPU_VMA_CACHE INT16_MAX |
Line 56... | Line 66... | ||
56 | #define MAP_PRESERVE_TIME 10 |
66 | #define MAP_PRESERVE_TIME 10 |
Line 70... | Line 80... | ||
70 | #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 |
80 | #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 |
71 | #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 |
81 | #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 |
72 | #define LOCAL_I915_PARAM_HAS_NO_RELOC 25 |
82 | #define LOCAL_I915_PARAM_HAS_NO_RELOC 25 |
73 | #define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 |
83 | #define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 |
Line -... | Line 84... | ||
- | 84 | ||
Line -... | Line 85... | ||
- | 85 | static struct kgem_bo *__kgem_freed_bo; |
|
- | 86 | ||
- | 87 | #define bucket(B) (B)->size.pages.bucket |
|
- | 88 | #define num_pages(B) (B)->size.pages.count |
|
- | 89 | ||
- | 90 | #ifdef DEBUG_MEMORY |
|
- | 91 | static void debug_alloc(struct kgem *kgem, size_t size) |
|
- | 92 | { |
|
- | 93 | kgem->debug_memory.bo_allocs++; |
|
- | 94 | kgem->debug_memory.bo_bytes += size; |
|
- | 95 | } |
|
- | 96 | static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) |
|
- | 97 | { |
|
- | 98 | debug_alloc(kgem, bytes(bo)); |
|
- | 99 | } |
|
- | 100 | #else |
|
- | 101 | #define debug_alloc(k, b) |
|
- | 102 | #define debug_alloc__bo(k, b) |
|
- | 103 | #endif |
|
- | 104 | ||
- | 105 | static uint32_t gem_create(int fd, int num_pages) |
|
- | 106 | { |
|
- | 107 | struct drm_i915_gem_create create; |
|
- | 108 | ioctl_t io; |
|
- | 109 | ||
- | 110 | VG_CLEAR(create); |
|
- | 111 | create.handle = 0; |
|
- | 112 | create.size = PAGE_SIZE * num_pages; |
|
- | 113 | ||
- | 114 | io.handle = fd; |
|
- | 115 | io.io_code = SRV_I915_GEM_CREATE; |
|
- | 116 | io.input = &create; |
|
- | 117 | io.inp_size = sizeof(create); |
|
- | 118 | io.output = NULL; |
|
- | 119 | io.out_size = 0; |
|
- | 120 | ||
- | 121 | if (call_service(&io)!=0) |
|
- | 122 | return 0; |
|
- | 123 | ||
- | 124 | return create.handle; |
|
- | 125 | } |
|
- | 126 | ||
- | 127 | static void gem_close(int fd, uint32_t handle) |
|
- | 128 | { |
|
- | 129 | struct drm_gem_close close; |
|
- | 130 | ioctl_t io; |
|
- | 131 | ||
- | 132 | VG_CLEAR(close); |
|
- | 133 | close.handle = handle; |
|
- | 134 | ||
- | 135 | io.handle = fd; |
|
- | 136 | io.io_code = SRV_DRM_GEM_CLOSE; |
|
- | 137 | io.input = &close; |
|
- | 138 | io.inp_size = sizeof(close); |
|
- | 139 | io.output = NULL; |
|
- | 140 | io.out_size = 0; |
|
- | 141 | ||
- | 142 | call_service(&io); |
|
- | 143 | } |
|
- | 144 | ||
- | 145 | constant inline static unsigned long __fls(unsigned long word) |
|
- | 146 | { |
|
- | 147 | #if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__)) |
|
- | 148 | asm("bsr %1,%0" |
|
- | 149 | : "=r" (word) |
|
- | 150 | : "rm" (word)); |
|
- | 151 | return word; |
|
- | 152 | #else |
|
- | 153 | unsigned int v = 0; |
|
- | 154 | ||
- | 155 | while (word >>= 1) |
|
- | 156 | v++; |
|
- | 157 | ||
- | 158 | return v; |
|
- | 159 | #endif |
|
- | 160 | } |
|
- | 161 | ||
- | 162 | constant inline static int cache_bucket(int num_pages) |
|
- | 163 | { |
|
- | 164 | return __fls(num_pages); |
|
- | 165 | } |
|
- | 166 | ||
- | 167 | static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, |
|
- | 168 | int handle, int num_pages) |
|
- | 169 | { |
|
- | 170 | assert(num_pages); |
|
- | 171 | memset(bo, 0, sizeof(*bo)); |
|
- | 172 | ||
- | 173 | bo->refcnt = 1; |
|
- | 174 | bo->handle = handle; |
|
- | 175 | bo->target_handle = -1; |
|
- | 176 | num_pages(bo) = num_pages; |
|
- | 177 | bucket(bo) = cache_bucket(num_pages); |
|
- | 178 | bo->reusable = true; |
|
- | 179 | bo->domain = DOMAIN_CPU; |
|
- | 180 | list_init(&bo->request); |
|
- | 181 | list_init(&bo->list); |
|
- | 182 | list_init(&bo->vma); |
|
- | 183 | ||
- | 184 | return bo; |
|
- | 185 | } |
|
- | 186 | ||
- | 187 | static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) |
|
- | 188 | { |
|
- | 189 | struct kgem_bo *bo; |
|
- | 190 | ||
- | 191 | if (__kgem_freed_bo) { |
|
- | 192 | bo = __kgem_freed_bo; |
|
- | 193 | __kgem_freed_bo = *(struct kgem_bo **)bo; |
|
- | 194 | } else { |
|
- | 195 | bo = malloc(sizeof(*bo)); |
|
- | 196 | if (bo == NULL) |
|
- | 197 | return NULL; |
|
- | 198 | } |
|
- | 199 | ||
Line 74... | Line 200... | ||
74 | 200 | return __kgem_bo_init(bo, handle, num_pages); |
|
75 | 201 | } |
|
76 | 202 | ||
Line 97... | Line 223... | ||
97 | 223 | ||
98 | VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); |
224 | VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); |
99 | return v; |
225 | return v; |
Line -... | Line 226... | ||
- | 226 | } |
|
- | 227 | ||
- | 228 | static bool test_has_execbuffer2(struct kgem *kgem) |
|
- | 229 | { |
|
- | 230 | return 1; |
|
100 | } |
231 | } |
101 | 232 | ||
102 | static bool test_has_no_reloc(struct kgem *kgem) |
233 | static bool test_has_no_reloc(struct kgem *kgem) |
103 | { |
234 | { |
Line 129... | Line 260... | ||
129 | return ret > 0; |
260 | return ret > 0; |
Line 130... | Line 261... | ||
130 | 261 | ||
131 | return detected; |
262 | return detected; |
Line -... | Line 263... | ||
- | 263 | } |
|
- | 264 | ||
- | 265 | static bool __kgem_throttle(struct kgem *kgem) |
|
- | 266 | { |
|
- | 267 | // if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) |
|
- | 268 | return false; |
|
- | 269 | ||
- | 270 | // return errno == EIO; |
|
- | 271 | } |
|
- | 272 | ||
- | 273 | static bool is_hw_supported(struct kgem *kgem, |
|
- | 274 | struct pci_device *dev) |
|
- | 275 | { |
|
- | 276 | if (DBG_NO_HW) |
|
- | 277 | return false; |
|
- | 278 | ||
- | 279 | if (!test_has_execbuffer2(kgem)) |
|
- | 280 | return false; |
|
- | 281 | ||
- | 282 | if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ |
|
- | 283 | return kgem->has_blt; |
|
- | 284 | ||
- | 285 | /* Although pre-855gm the GMCH is fubar, it works mostly. So |
|
- | 286 | * let the user decide through "NoAccel" whether or not to risk |
|
- | 287 | * hw acceleration. |
|
- | 288 | */ |
|
- | 289 | ||
- | 290 | if (kgem->gen == 060 && dev->revision < 8) { |
|
- | 291 | /* pre-production SNB with dysfunctional BLT */ |
|
- | 292 | return false; |
|
- | 293 | } |
|
- | 294 | ||
- | 295 | if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */ |
|
- | 296 | return kgem->has_blt; |
|
- | 297 | ||
Line 132... | Line 298... | ||
132 | } |
298 | return true; |
133 | 299 | } |
|
134 | 300 | ||
135 | static bool test_has_relaxed_fencing(struct kgem *kgem) |
301 | static bool test_has_relaxed_fencing(struct kgem *kgem) |
Line 221... | Line 387... | ||
221 | 387 | ||
222 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; |
388 | return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; |
Line -... | Line 389... | ||
- | 389 | } |
|
- | 390 | ||
- | 391 | ||
- | 392 | static bool kgem_init_pinned_batches(struct kgem *kgem) |
|
- | 393 | { |
|
- | 394 | ioctl_t io; |
|
- | 395 | ||
- | 396 | int count[2] = { 4, 2 }; |
|
- | 397 | int size[2] = { 1, 4 }; |
|
- | 398 | int n, i; |
|
- | 399 | ||
- | 400 | if (kgem->wedged) |
|
- | 401 | return true; |
|
- | 402 | ||
- | 403 | for (n = 0; n < ARRAY_SIZE(count); n++) { |
|
- | 404 | for (i = 0; i < count[n]; i++) { |
|
- | 405 | struct drm_i915_gem_pin pin; |
|
- | 406 | struct kgem_bo *bo; |
|
- | 407 | ||
- | 408 | VG_CLEAR(pin); |
|
- | 409 | ||
- | 410 | pin.handle = gem_create(kgem->fd, size[n]); |
|
- | 411 | if (pin.handle == 0) |
|
- | 412 | goto err; |
|
- | 413 | ||
- | 414 | DBG(("%s: new handle=%d, num_pages=%d\n", |
|
- | 415 | __FUNCTION__, pin.handle, size[n])); |
|
- | 416 | ||
- | 417 | bo = __kgem_bo_alloc(pin.handle, size[n]); |
|
- | 418 | if (bo == NULL) { |
|
- | 419 | gem_close(kgem->fd, pin.handle); |
|
- | 420 | goto err; |
|
- | 421 | } |
|
- | 422 | ||
- | 423 | pin.alignment = 0; |
|
- | 424 | ||
- | 425 | io.handle = kgem->fd; |
|
- | 426 | io.io_code = SRV_I915_GEM_PIN; |
|
- | 427 | io.input = &pin; |
|
- | 428 | io.inp_size = sizeof(pin); |
|
- | 429 | io.output = NULL; |
|
- | 430 | io.out_size = 0; |
|
- | 431 | ||
- | 432 | if (call_service(&io)!=0){ |
|
- | 433 | gem_close(kgem->fd, pin.handle); |
|
- | 434 | goto err; |
|
- | 435 | } |
|
- | 436 | bo->presumed_offset = pin.offset; |
|
- | 437 | debug_alloc__bo(kgem, bo); |
|
- | 438 | list_add(&bo->list, &kgem->pinned_batches[n]); |
|
- | 439 | } |
|
- | 440 | } |
|
- | 441 | ||
- | 442 | return true; |
|
- | 443 | ||
- | 444 | err: |
|
- | 445 | for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { |
|
- | 446 | while (!list_is_empty(&kgem->pinned_batches[n])) { |
|
- | 447 | kgem_bo_destroy(kgem, |
|
- | 448 | list_first_entry(&kgem->pinned_batches[n], |
|
- | 449 | struct kgem_bo, list)); |
|
- | 450 | } |
|
- | 451 | } |
|
- | 452 | ||
- | 453 | /* For simplicity populate the lists with a single unpinned bo */ |
|
- | 454 | for (n = 0; n < ARRAY_SIZE(count); n++) { |
|
- | 455 | struct kgem_bo *bo; |
|
- | 456 | uint32_t handle; |
|
- | 457 | ||
- | 458 | handle = gem_create(kgem->fd, size[n]); |
|
- | 459 | if (handle == 0) |
|
- | 460 | break; |
|
- | 461 | ||
- | 462 | bo = __kgem_bo_alloc(handle, size[n]); |
|
- | 463 | if (bo == NULL) { |
|
- | 464 | gem_close(kgem->fd, handle); |
|
- | 465 | break; |
|
- | 466 | } |
|
- | 467 | ||
- | 468 | debug_alloc__bo(kgem, bo); |
|
- | 469 | list_add(&bo->list, &kgem->pinned_batches[n]); |
|
- | 470 | } |
|
- | 471 | return false; |
|
Line 223... | Line 472... | ||
223 | } |
472 | } |
224 | 473 | ||
225 | 474 | ||
226 | 475 | ||
Line 257... | Line 506... | ||
257 | } |
506 | } |
258 | for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { |
507 | for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { |
259 | for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) |
508 | for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) |
260 | list_init(&kgem->vma[i].inactive[j]); |
509 | list_init(&kgem->vma[i].inactive[j]); |
261 | } |
510 | } |
262 | - | ||
263 | kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; |
511 | kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; |
264 | kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; |
512 | kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; |
Line 265... | Line 513... | ||
265 | 513 | ||
266 | kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; |
514 | kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; |
Line 270... | Line 518... | ||
270 | kgem->has_relaxed_delta = |
518 | kgem->has_relaxed_delta = |
271 | gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; |
519 | gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; |
272 | DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, |
520 | DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, |
273 | kgem->has_relaxed_delta)); |
521 | kgem->has_relaxed_delta)); |
Line 274... | Line -... | ||
274 | - | ||
275 | 522 | ||
276 | kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); |
523 | kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); |
277 | DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, |
524 | DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, |
Line 278... | Line 525... | ||
278 | kgem->has_relaxed_fencing)); |
525 | kgem->has_relaxed_fencing)); |
Line 313... | Line 560... | ||
313 | 560 | ||
314 | kgem->has_pinned_batches = test_has_pinned_batches(kgem); |
561 | kgem->has_pinned_batches = test_has_pinned_batches(kgem); |
315 | DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, |
562 | DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, |
Line 316... | Line -... | ||
316 | kgem->has_pinned_batches)); |
- | |
317 | - | ||
318 | #if 0 |
563 | kgem->has_pinned_batches)); |
319 | - | ||
320 | if (!is_hw_supported(kgem, dev)) { |
564 | |
321 | xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, |
565 | if (!is_hw_supported(kgem, dev)) { |
322 | "Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); |
566 | printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); |
323 | kgem->wedged = 1; |
- | |
324 | } else if (__kgem_throttle(kgem)) { |
567 | kgem->wedged = 1; |
325 | xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, |
568 | } else if (__kgem_throttle(kgem)) { |
326 | "Detected a hung GPU, disabling acceleration.\n"); |
569 | printf("Detected a hung GPU, disabling acceleration.\n"); |
Line 327... | Line 570... | ||
327 | kgem->wedged = 1; |
570 | kgem->wedged = 1; |
328 | } |
571 | } |
Line 338... | Line 581... | ||
338 | kgem->batch_size = 16*1024; |
581 | kgem->batch_size = 16*1024; |
339 | if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) |
582 | if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) |
340 | kgem->batch_size = 4*1024; |
583 | kgem->batch_size = 4*1024; |
Line 341... | Line 584... | ||
341 | 584 | ||
342 | if (!kgem_init_pinned_batches(kgem) && gen == 020) { |
- | |
343 | xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, |
585 | if (!kgem_init_pinned_batches(kgem) && gen == 020) { |
344 | "Unable to reserve memory for GPU, disabling acceleration.\n"); |
586 | printf("Unable to reserve memory for GPU, disabling acceleration.\n"); |
345 | kgem->wedged = 1; |
587 | kgem->wedged = 1; |
Line 346... | Line 588... | ||
346 | } |
588 | } |
347 | 589 | ||
Line 348... | Line 590... | ||
348 | DBG(("%s: maximum batch size? %d\n", __FUNCTION__, |
590 | DBG(("%s: maximum batch size? %d\n", __FUNCTION__, |
349 | kgem->batch_size)); |
591 | kgem->batch_size)); |
350 | 592 | ||
Line -... | Line 593... | ||
- | 593 | kgem->min_alignment = 4; |
|
- | 594 | if (gen < 040) |
|
351 | kgem->min_alignment = 4; |
595 | kgem->min_alignment = 64; |
352 | if (gen < 040) |
596 | |
353 | kgem->min_alignment = 64; |
597 | #if 0 |
Line 354... | Line 598... | ||
354 | 598 |