Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 2012-2013 LunarG, Inc. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
22 | * DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Chia-I Wu |
||
26 | */ |
||
27 | |||
28 | #include "util/u_surface.h" |
||
29 | #include "util/u_transfer.h" |
||
30 | #include "util/u_format_etc.h" |
||
31 | |||
32 | #include "ilo_cp.h" |
||
33 | #include "ilo_context.h" |
||
34 | #include "ilo_resource.h" |
||
35 | #include "ilo_state.h" |
||
36 | #include "ilo_transfer.h" |
||
37 | |||
38 | static bool |
||
39 | is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush) |
||
40 | { |
||
41 | const bool referenced = intel_bo_references(ilo->cp->bo, bo); |
||
42 | |||
43 | if (need_flush) |
||
44 | *need_flush = referenced; |
||
45 | |||
46 | if (referenced) |
||
47 | return true; |
||
48 | |||
49 | return intel_bo_is_busy(bo); |
||
50 | } |
||
51 | |||
52 | static bool |
||
53 | map_bo_for_transfer(struct ilo_context *ilo, struct intel_bo *bo, |
||
54 | const struct ilo_transfer *xfer) |
||
55 | { |
||
56 | int err; |
||
57 | |||
58 | switch (xfer->method) { |
||
59 | case ILO_TRANSFER_MAP_CPU: |
||
60 | err = intel_bo_map(bo, (xfer->base.usage & PIPE_TRANSFER_WRITE)); |
||
61 | break; |
||
62 | case ILO_TRANSFER_MAP_GTT: |
||
63 | err = intel_bo_map_gtt(bo); |
||
64 | break; |
||
65 | case ILO_TRANSFER_MAP_UNSYNC: |
||
66 | err = intel_bo_map_unsynchronized(bo); |
||
67 | break; |
||
68 | default: |
||
69 | assert(!"unknown mapping method"); |
||
70 | err = -1; |
||
71 | break; |
||
72 | } |
||
73 | |||
74 | return !err; |
||
75 | } |
||
76 | |||
77 | /** |
||
78 | * Choose the best mapping method, depending on the transfer usage and whether |
||
79 | * the bo is busy. |
||
80 | */ |
||
81 | static bool |
||
82 | choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer) |
||
83 | { |
||
84 | struct pipe_resource *res = xfer->base.resource; |
||
85 | const unsigned usage = xfer->base.usage; |
||
86 | /* prefer map() when there is the last-level cache */ |
||
87 | const bool prefer_cpu = |
||
88 | (ilo->dev->has_llc || (usage & PIPE_TRANSFER_READ)); |
||
89 | struct ilo_texture *tex; |
||
90 | struct ilo_buffer *buf; |
||
91 | struct intel_bo *bo; |
||
92 | bool tiled, need_flush; |
||
93 | |||
94 | if (res->target == PIPE_BUFFER) { |
||
95 | tex = NULL; |
||
96 | |||
97 | buf = ilo_buffer(res); |
||
98 | bo = buf->bo; |
||
99 | tiled = false; |
||
100 | } |
||
101 | else { |
||
102 | buf = NULL; |
||
103 | |||
104 | tex = ilo_texture(res); |
||
105 | bo = tex->bo; |
||
106 | tiled = (tex->tiling != INTEL_TILING_NONE); |
||
107 | } |
||
108 | |||
109 | /* choose between mapping through CPU or GTT */ |
||
110 | if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { |
||
111 | /* we do not want fencing */ |
||
112 | if (tiled || prefer_cpu) |
||
113 | xfer->method = ILO_TRANSFER_MAP_CPU; |
||
114 | else |
||
115 | xfer->method = ILO_TRANSFER_MAP_GTT; |
||
116 | } |
||
117 | else { |
||
118 | if (!tiled && prefer_cpu) |
||
119 | xfer->method = ILO_TRANSFER_MAP_CPU; |
||
120 | else |
||
121 | xfer->method = ILO_TRANSFER_MAP_GTT; |
||
122 | } |
||
123 | |||
124 | /* see if we can avoid stalling */ |
||
125 | if (is_bo_busy(ilo, bo, &need_flush)) { |
||
126 | bool will_stall = true; |
||
127 | |||
128 | if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { |
||
129 | /* nothing we can do */ |
||
130 | } |
||
131 | else if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { |
||
132 | /* unsynchronized gtt mapping does not stall */ |
||
133 | xfer->method = ILO_TRANSFER_MAP_UNSYNC; |
||
134 | will_stall = false; |
||
135 | } |
||
136 | else if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { |
||
137 | /* discard old bo and allocate a new one for mapping */ |
||
138 | if ((tex && ilo_texture_alloc_bo(tex)) || |
||
139 | (buf && ilo_buffer_alloc_bo(buf))) { |
||
140 | ilo_mark_states_with_resource_dirty(ilo, res); |
||
141 | will_stall = false; |
||
142 | } |
||
143 | } |
||
144 | else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { |
||
145 | /* |
||
146 | * We could allocate and return a system buffer here. When a region of |
||
147 | * the buffer is explicitly flushed, we pwrite() the region to a |
||
148 | * temporary bo and emit pipelined copy blit. |
||
149 | * |
||
150 | * For now, do nothing. |
||
151 | */ |
||
152 | } |
||
153 | else if (usage & PIPE_TRANSFER_DISCARD_RANGE) { |
||
154 | /* |
||
155 | * We could allocate a temporary bo for mapping, and emit pipelined copy |
||
156 | * blit upon unmapping. |
||
157 | * |
||
158 | * For now, do nothing. |
||
159 | */ |
||
160 | } |
||
161 | |||
162 | if (will_stall) { |
||
163 | if (usage & PIPE_TRANSFER_DONTBLOCK) |
||
164 | return false; |
||
165 | |||
166 | /* flush to make bo busy (so that map() stalls as it should be) */ |
||
167 | if (need_flush) |
||
168 | ilo_cp_flush(ilo->cp); |
||
169 | } |
||
170 | } |
||
171 | |||
172 | if (tex && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) { |
||
173 | if (tex->separate_s8 || tex->bo_format == PIPE_FORMAT_S8_UINT) |
||
174 | xfer->method = ILO_TRANSFER_MAP_SW_ZS; |
||
175 | /* need to convert on-the-fly */ |
||
176 | else if (tex->bo_format != tex->base.format) |
||
177 | xfer->method = ILO_TRANSFER_MAP_SW_CONVERT; |
||
178 | } |
||
179 | |||
180 | return true; |
||
181 | } |
||
182 | |||
183 | static void |
||
184 | tex_get_box_origin(const struct ilo_texture *tex, |
||
185 | unsigned level, unsigned slice, |
||
186 | const struct pipe_box *box, |
||
187 | unsigned *mem_x, unsigned *mem_y) |
||
188 | { |
||
189 | unsigned x, y; |
||
190 | |||
191 | x = tex->slice_offsets[level][slice + box->z].x + box->x; |
||
192 | y = tex->slice_offsets[level][slice + box->z].y + box->y; |
||
193 | |||
194 | assert(x % tex->block_width == 0 && y % tex->block_height == 0); |
||
195 | |||
196 | *mem_x = x / tex->block_width * tex->bo_cpp; |
||
197 | *mem_y = y / tex->block_height; |
||
198 | } |
||
199 | |||
200 | static unsigned |
||
201 | tex_get_box_offset(const struct ilo_texture *tex, unsigned level, |
||
202 | const struct pipe_box *box) |
||
203 | { |
||
204 | unsigned mem_x, mem_y; |
||
205 | |||
206 | tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y); |
||
207 | |||
208 | return mem_y * tex->bo_stride + mem_x; |
||
209 | } |
||
210 | |||
211 | static unsigned |
||
212 | tex_get_slice_stride(const struct ilo_texture *tex, unsigned level) |
||
213 | { |
||
214 | unsigned qpitch; |
||
215 | |||
216 | /* there is no 3D array texture */ |
||
217 | assert(tex->base.array_size == 1 || tex->base.depth0 == 1); |
||
218 | |||
219 | if (tex->base.array_size == 1) { |
||
220 | /* non-array, non-3D */ |
||
221 | if (tex->base.depth0 == 1) |
||
222 | return 0; |
||
223 | |||
224 | /* only the first level has a fixed slice stride */ |
||
225 | if (level > 0) { |
||
226 | assert(!"no slice stride for 3D texture with level > 0"); |
||
227 | return 0; |
||
228 | } |
||
229 | } |
||
230 | |||
231 | qpitch = tex->slice_offsets[level][1].y - tex->slice_offsets[level][0].y; |
||
232 | assert(qpitch % tex->block_height == 0); |
||
233 | |||
234 | return (qpitch / tex->block_height) * tex->bo_stride; |
||
235 | } |
||
236 | |||
237 | static unsigned |
||
238 | tex_tile_x_swizzle(unsigned addr) |
||
239 | { |
||
240 | /* |
||
241 | * From the Ivy Bridge PRM, volume 1 part 2, page 24: |
||
242 | * |
||
243 | * "As shown in the tiling algorithm, the new address bit[6] should be: |
||
244 | * |
||
245 | * Address bit[6] <= TiledAddr bit[6] XOR |
||
246 | * TiledAddr bit[9] XOR |
||
247 | * TiledAddr bit[10]" |
||
248 | */ |
||
249 | return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40); |
||
250 | } |
||
251 | |||
252 | static unsigned |
||
253 | tex_tile_y_swizzle(unsigned addr) |
||
254 | { |
||
255 | /* |
||
256 | * From the Ivy Bridge PRM, volume 1 part 2, page 24: |
||
257 | * |
||
258 | * "As shown in the tiling algorithm, The new address bit[6] becomes: |
||
259 | * |
||
260 | * Address bit[6] <= TiledAddr bit[6] XOR |
||
261 | * TiledAddr bit[9]" |
||
262 | */ |
||
263 | return addr ^ ((addr >> 3) & 0x40); |
||
264 | } |
||
265 | |||
266 | static unsigned |
||
267 | tex_tile_x_offset(unsigned mem_x, unsigned mem_y, |
||
268 | unsigned tiles_per_row, bool swizzle) |
||
269 | { |
||
270 | /* |
||
271 | * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a |
||
272 | * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the |
||
273 | * tiled region are numbered in row-major order, starting from zero. The |
||
274 | * tile number can thus be calculated as follows: |
||
275 | * |
||
276 | * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512) |
||
277 | * |
||
278 | * OWords in that tile are also numbered in row-major order, starting from |
||
279 | * zero. The OWord number can thus be calculated as follows: |
||
280 | * |
||
281 | * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16) |
||
282 | * |
||
283 | * and the tiled offset is |
||
284 | * |
||
285 | * offset = tile * 4096 + oword * 16 + (mem_x % 16) |
||
286 | * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512) |
||
287 | */ |
||
288 | unsigned tile, offset; |
||
289 | |||
290 | tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9); |
||
291 | offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff); |
||
292 | |||
293 | return (swizzle) ? tex_tile_x_swizzle(offset) : offset; |
||
294 | } |
||
295 | |||
296 | static unsigned |
||
297 | tex_tile_y_offset(unsigned mem_x, unsigned mem_y, |
||
298 | unsigned tiles_per_row, bool swizzle) |
||
299 | { |
||
300 | /* |
||
301 | * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a |
||
302 | * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the |
||
303 | * tiled region are numbered in row-major order, starting from zero. The |
||
304 | * tile number can thus be calculated as follows: |
||
305 | * |
||
306 | * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128) |
||
307 | * |
||
308 | * OWords in that tile are numbered in column-major order, starting from |
||
309 | * zero. The OWord number can thus be calculated as follows: |
||
310 | * |
||
311 | * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32) |
||
312 | * |
||
313 | * and the tiled offset is |
||
314 | * |
||
315 | * offset = tile * 4096 + oword * 16 + (mem_x % 16) |
||
316 | */ |
||
317 | unsigned tile, oword, offset; |
||
318 | |||
319 | tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7); |
||
320 | oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f); |
||
321 | offset = tile << 12 | oword << 4 | (mem_x & 0xf); |
||
322 | |||
323 | return (swizzle) ? tex_tile_y_swizzle(offset) : offset; |
||
324 | } |
||
325 | |||
326 | static unsigned |
||
327 | tex_tile_w_offset(unsigned mem_x, unsigned mem_y, |
||
328 | unsigned tiles_per_row, bool swizzle) |
||
329 | { |
||
330 | /* |
||
331 | * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a |
||
332 | * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the |
||
333 | * tiled region are numbered in row-major order, starting from zero. The |
||
334 | * tile number can thus be calculated as follows: |
||
335 | * |
||
336 | * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64) |
||
337 | * |
||
338 | * 8x8-blocks in that tile are numbered in column-major order, starting |
||
339 | * from zero. The 8x8-block number can thus be calculated as follows: |
||
340 | * |
||
341 | * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8) |
||
342 | * |
||
343 | * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each |
||
344 | * 4x4-block is further divided into 4 2x2-blocks, also in row-major order. |
||
345 | * We have |
||
346 | * |
||
347 | * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1) |
||
348 | * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1) |
||
349 | * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1) |
||
350 | * |
||
351 | * and the tiled offset is |
||
352 | * |
||
353 | * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1 |
||
354 | */ |
||
355 | unsigned tile, blk8, blk4, blk2, blk1, offset; |
||
356 | |||
357 | tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6); |
||
358 | blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7); |
||
359 | blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1); |
||
360 | blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1); |
||
361 | blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1); |
||
362 | offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1; |
||
363 | |||
364 | return (swizzle) ? tex_tile_y_swizzle(offset) : offset; |
||
365 | } |
||
366 | |||
367 | static unsigned |
||
368 | tex_tile_none_offset(unsigned mem_x, unsigned mem_y, |
||
369 | unsigned tiles_per_row, bool swizzle) |
||
370 | { |
||
371 | return mem_y * tiles_per_row + mem_x; |
||
372 | } |
||
373 | |||
374 | typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y, |
||
375 | unsigned tiles_per_row, |
||
376 | bool swizzle); |
||
377 | |||
378 | static tex_tile_offset_func |
||
379 | tex_tile_choose_offset_func(const struct ilo_texture *tex, |
||
380 | unsigned *tiles_per_row) |
||
381 | { |
||
382 | switch (tex->tiling) { |
||
383 | case INTEL_TILING_X: |
||
384 | *tiles_per_row = tex->bo_stride / 512; |
||
385 | return tex_tile_x_offset; |
||
386 | case INTEL_TILING_Y: |
||
387 | *tiles_per_row = tex->bo_stride / 128; |
||
388 | return tex_tile_y_offset; |
||
389 | case INTEL_TILING_NONE: |
||
390 | default: |
||
391 | /* W-tiling */ |
||
392 | if (tex->bo_format == PIPE_FORMAT_S8_UINT) { |
||
393 | *tiles_per_row = tex->bo_stride / 64; |
||
394 | return tex_tile_w_offset; |
||
395 | } |
||
396 | else { |
||
397 | *tiles_per_row = tex->bo_stride; |
||
398 | return tex_tile_none_offset; |
||
399 | } |
||
400 | } |
||
401 | } |
||
402 | |||
403 | static void |
||
404 | tex_staging_sys_zs_read(struct ilo_context *ilo, |
||
405 | struct ilo_texture *tex, |
||
406 | const struct ilo_transfer *xfer) |
||
407 | { |
||
408 | const bool swizzle = ilo->dev->has_address_swizzling; |
||
409 | const struct pipe_box *box = &xfer->base.box; |
||
410 | const uint8_t *src = intel_bo_get_virtual(tex->bo); |
||
411 | tex_tile_offset_func tile_offset; |
||
412 | unsigned tiles_per_row; |
||
413 | int slice; |
||
414 | |||
415 | tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); |
||
416 | |||
417 | assert(tex->block_width == 1 && tex->block_height == 1); |
||
418 | |||
419 | if (tex->separate_s8) { |
||
420 | struct ilo_texture *s8_tex = tex->separate_s8; |
||
421 | const uint8_t *s8_src = intel_bo_get_virtual(s8_tex->bo); |
||
422 | tex_tile_offset_func s8_tile_offset; |
||
423 | unsigned s8_tiles_per_row; |
||
424 | int dst_cpp, dst_s8_pos, src_cpp_used; |
||
425 | |||
426 | s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); |
||
427 | |||
428 | if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { |
||
429 | assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM); |
||
430 | |||
431 | dst_cpp = 4; |
||
432 | dst_s8_pos = 3; |
||
433 | src_cpp_used = 3; |
||
434 | } |
||
435 | else { |
||
436 | assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
||
437 | assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT); |
||
438 | |||
439 | dst_cpp = 8; |
||
440 | dst_s8_pos = 4; |
||
441 | src_cpp_used = 4; |
||
442 | } |
||
443 | |||
444 | for (slice = 0; slice < box->depth; slice++) { |
||
445 | unsigned mem_x, mem_y, s8_mem_x, s8_mem_y; |
||
446 | uint8_t *dst; |
||
447 | int i, j; |
||
448 | |||
449 | tex_get_box_origin(tex, xfer->base.level, slice, |
||
450 | box, &mem_x, &mem_y); |
||
451 | tex_get_box_origin(s8_tex, xfer->base.level, slice, |
||
452 | box, &s8_mem_x, &s8_mem_y); |
||
453 | |||
454 | dst = xfer->staging_sys + xfer->base.layer_stride * slice; |
||
455 | |||
456 | for (i = 0; i < box->height; i++) { |
||
457 | unsigned x = mem_x, s8_x = s8_mem_x; |
||
458 | uint8_t *d = dst; |
||
459 | |||
460 | for (j = 0; j < box->width; j++) { |
||
461 | const unsigned offset = |
||
462 | tile_offset(x, mem_y, tiles_per_row, swizzle); |
||
463 | const unsigned s8_offset = |
||
464 | s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle); |
||
465 | |||
466 | memcpy(d, src + offset, src_cpp_used); |
||
467 | d[dst_s8_pos] = s8_src[s8_offset]; |
||
468 | |||
469 | d += dst_cpp; |
||
470 | x += tex->bo_cpp; |
||
471 | s8_x++; |
||
472 | } |
||
473 | |||
474 | dst += xfer->base.stride; |
||
475 | mem_y++; |
||
476 | s8_mem_y++; |
||
477 | } |
||
478 | } |
||
479 | } |
||
480 | else { |
||
481 | assert(tex->bo_format == PIPE_FORMAT_S8_UINT); |
||
482 | |||
483 | for (slice = 0; slice < box->depth; slice++) { |
||
484 | unsigned mem_x, mem_y; |
||
485 | uint8_t *dst; |
||
486 | int i, j; |
||
487 | |||
488 | tex_get_box_origin(tex, xfer->base.level, slice, |
||
489 | box, &mem_x, &mem_y); |
||
490 | |||
491 | dst = xfer->staging_sys + xfer->base.layer_stride * slice; |
||
492 | |||
493 | for (i = 0; i < box->height; i++) { |
||
494 | unsigned x = mem_x; |
||
495 | uint8_t *d = dst; |
||
496 | |||
497 | for (j = 0; j < box->width; j++) { |
||
498 | const unsigned offset = |
||
499 | tile_offset(x, mem_y, tiles_per_row, swizzle); |
||
500 | |||
501 | *d = src[offset]; |
||
502 | |||
503 | d++; |
||
504 | x++; |
||
505 | } |
||
506 | |||
507 | dst += xfer->base.stride; |
||
508 | mem_y++; |
||
509 | } |
||
510 | } |
||
511 | } |
||
512 | } |
||
513 | |||
514 | static void |
||
515 | tex_staging_sys_zs_write(struct ilo_context *ilo, |
||
516 | struct ilo_texture *tex, |
||
517 | const struct ilo_transfer *xfer) |
||
518 | { |
||
519 | const bool swizzle = ilo->dev->has_address_swizzling; |
||
520 | const struct pipe_box *box = &xfer->base.box; |
||
521 | uint8_t *dst = intel_bo_get_virtual(tex->bo); |
||
522 | tex_tile_offset_func tile_offset; |
||
523 | unsigned tiles_per_row; |
||
524 | int slice; |
||
525 | |||
526 | tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); |
||
527 | |||
528 | assert(tex->block_width == 1 && tex->block_height == 1); |
||
529 | |||
530 | if (tex->separate_s8) { |
||
531 | struct ilo_texture *s8_tex = tex->separate_s8; |
||
532 | uint8_t *s8_dst = intel_bo_get_virtual(s8_tex->bo); |
||
533 | tex_tile_offset_func s8_tile_offset; |
||
534 | unsigned s8_tiles_per_row; |
||
535 | int src_cpp, src_s8_pos, dst_cpp_used; |
||
536 | |||
537 | s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); |
||
538 | |||
539 | if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { |
||
540 | assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM); |
||
541 | |||
542 | src_cpp = 4; |
||
543 | src_s8_pos = 3; |
||
544 | dst_cpp_used = 3; |
||
545 | } |
||
546 | else { |
||
547 | assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
||
548 | assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT); |
||
549 | |||
550 | src_cpp = 8; |
||
551 | src_s8_pos = 4; |
||
552 | dst_cpp_used = 4; |
||
553 | } |
||
554 | |||
555 | for (slice = 0; slice < box->depth; slice++) { |
||
556 | unsigned mem_x, mem_y, s8_mem_x, s8_mem_y; |
||
557 | const uint8_t *src; |
||
558 | int i, j; |
||
559 | |||
560 | tex_get_box_origin(tex, xfer->base.level, slice, |
||
561 | box, &mem_x, &mem_y); |
||
562 | tex_get_box_origin(s8_tex, xfer->base.level, slice, |
||
563 | box, &s8_mem_x, &s8_mem_y); |
||
564 | |||
565 | src = xfer->staging_sys + xfer->base.layer_stride * slice; |
||
566 | |||
567 | for (i = 0; i < box->height; i++) { |
||
568 | unsigned x = mem_x, s8_x = s8_mem_x; |
||
569 | const uint8_t *s = src; |
||
570 | |||
571 | for (j = 0; j < box->width; j++) { |
||
572 | const unsigned offset = |
||
573 | tile_offset(x, mem_y, tiles_per_row, swizzle); |
||
574 | const unsigned s8_offset = |
||
575 | s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle); |
||
576 | |||
577 | memcpy(dst + offset, s, dst_cpp_used); |
||
578 | s8_dst[s8_offset] = s[src_s8_pos]; |
||
579 | |||
580 | s += src_cpp; |
||
581 | x += tex->bo_cpp; |
||
582 | s8_x++; |
||
583 | } |
||
584 | |||
585 | src += xfer->base.stride; |
||
586 | mem_y++; |
||
587 | s8_mem_y++; |
||
588 | } |
||
589 | } |
||
590 | } |
||
591 | else { |
||
592 | assert(tex->bo_format == PIPE_FORMAT_S8_UINT); |
||
593 | |||
594 | for (slice = 0; slice < box->depth; slice++) { |
||
595 | unsigned mem_x, mem_y; |
||
596 | const uint8_t *src; |
||
597 | int i, j; |
||
598 | |||
599 | tex_get_box_origin(tex, xfer->base.level, slice, |
||
600 | box, &mem_x, &mem_y); |
||
601 | |||
602 | src = xfer->staging_sys + xfer->base.layer_stride * slice; |
||
603 | |||
604 | for (i = 0; i < box->height; i++) { |
||
605 | unsigned x = mem_x; |
||
606 | const uint8_t *s = src; |
||
607 | |||
608 | for (j = 0; j < box->width; j++) { |
||
609 | const unsigned offset = |
||
610 | tile_offset(x, mem_y, tiles_per_row, swizzle); |
||
611 | |||
612 | dst[offset] = *s; |
||
613 | |||
614 | s++; |
||
615 | x++; |
||
616 | } |
||
617 | |||
618 | src += xfer->base.stride; |
||
619 | mem_y++; |
||
620 | } |
||
621 | } |
||
622 | } |
||
623 | } |
||
624 | |||
625 | static void |
||
626 | tex_staging_sys_convert_write(struct ilo_context *ilo, |
||
627 | struct ilo_texture *tex, |
||
628 | const struct ilo_transfer *xfer) |
||
629 | { |
||
630 | const struct pipe_box *box = &xfer->base.box; |
||
631 | unsigned dst_slice_stride; |
||
632 | void *dst; |
||
633 | int slice; |
||
634 | |||
635 | dst = intel_bo_get_virtual(tex->bo); |
||
636 | dst += tex_get_box_offset(tex, xfer->base.level, box); |
||
637 | |||
638 | /* slice stride is not always available */ |
||
639 | if (box->depth > 1) |
||
640 | dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level); |
||
641 | else |
||
642 | dst_slice_stride = 0; |
||
643 | |||
644 | if (unlikely(tex->bo_format == tex->base.format)) { |
||
645 | util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride, |
||
646 | 0, 0, 0, box->width, box->height, box->depth, |
||
647 | xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride, |
||
648 | 0, 0, 0); |
||
649 | return; |
||
650 | } |
||
651 | |||
652 | switch (tex->base.format) { |
||
653 | case PIPE_FORMAT_ETC1_RGB8: |
||
654 | assert(tex->bo_format == PIPE_FORMAT_R8G8B8X8_UNORM); |
||
655 | |||
656 | for (slice = 0; slice < box->depth; slice++) { |
||
657 | const void *src = |
||
658 | xfer->staging_sys + xfer->base.layer_stride * slice; |
||
659 | |||
660 | util_format_etc1_rgb8_unpack_rgba_8unorm(dst, |
||
661 | tex->bo_stride, src, xfer->base.stride, |
||
662 | box->width, box->height); |
||
663 | |||
664 | dst += dst_slice_stride; |
||
665 | } |
||
666 | break; |
||
667 | default: |
||
668 | assert(!"unable to convert the staging data"); |
||
669 | break; |
||
670 | } |
||
671 | } |
||
672 | |||
673 | static bool |
||
674 | tex_staging_sys_map_bo(const struct ilo_context *ilo, |
||
675 | const struct ilo_texture *tex, |
||
676 | bool for_read_back, bool linear_view) |
||
677 | { |
||
678 | const bool prefer_cpu = (ilo->dev->has_llc || for_read_back); |
||
679 | int err; |
||
680 | |||
681 | if (prefer_cpu && (tex->tiling == INTEL_TILING_NONE || !linear_view)) |
||
682 | err = intel_bo_map(tex->bo, !for_read_back); |
||
683 | else |
||
684 | err = intel_bo_map_gtt(tex->bo); |
||
685 | |||
686 | if (!tex->separate_s8) |
||
687 | return !err; |
||
688 | |||
689 | err = intel_bo_map(tex->separate_s8->bo, !for_read_back); |
||
690 | if (err) |
||
691 | intel_bo_unmap(tex->bo); |
||
692 | |||
693 | return !err; |
||
694 | } |
||
695 | |||
696 | static void |
||
697 | tex_staging_sys_unmap_bo(const struct ilo_context *ilo, |
||
698 | const struct ilo_texture *tex) |
||
699 | { |
||
700 | if (tex->separate_s8) |
||
701 | intel_bo_unmap(tex->separate_s8->bo); |
||
702 | |||
703 | intel_bo_unmap(tex->bo); |
||
704 | } |
||
705 | |||
706 | static void |
||
707 | tex_staging_sys_unmap(struct ilo_context *ilo, |
||
708 | struct ilo_texture *tex, |
||
709 | struct ilo_transfer *xfer) |
||
710 | { |
||
711 | bool success; |
||
712 | |||
713 | if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) { |
||
714 | FREE(xfer->staging_sys); |
||
715 | return; |
||
716 | } |
||
717 | |||
718 | switch (xfer->method) { |
||
719 | case ILO_TRANSFER_MAP_SW_CONVERT: |
||
720 | success = tex_staging_sys_map_bo(ilo, tex, false, true); |
||
721 | if (success) { |
||
722 | tex_staging_sys_convert_write(ilo, tex, xfer); |
||
723 | tex_staging_sys_unmap_bo(ilo, tex); |
||
724 | } |
||
725 | break; |
||
726 | case ILO_TRANSFER_MAP_SW_ZS: |
||
727 | success = tex_staging_sys_map_bo(ilo, tex, false, false); |
||
728 | if (success) { |
||
729 | tex_staging_sys_zs_write(ilo, tex, xfer); |
||
730 | tex_staging_sys_unmap_bo(ilo, tex); |
||
731 | } |
||
732 | break; |
||
733 | default: |
||
734 | assert(!"unknown mapping method"); |
||
735 | success = false; |
||
736 | break; |
||
737 | } |
||
738 | |||
739 | if (!success) |
||
740 | ilo_err("failed to map resource for moving staging data\n"); |
||
741 | |||
742 | FREE(xfer->staging_sys); |
||
743 | } |
||
744 | |||
745 | static bool |
||
746 | tex_staging_sys_map(struct ilo_context *ilo, |
||
747 | struct ilo_texture *tex, |
||
748 | struct ilo_transfer *xfer) |
||
749 | { |
||
750 | const struct pipe_box *box = &xfer->base.box; |
||
751 | const size_t stride = util_format_get_stride(tex->base.format, box->width); |
||
752 | const size_t size = |
||
753 | util_format_get_2d_size(tex->base.format, stride, box->height); |
||
754 | bool read_back = false, success; |
||
755 | |||
756 | xfer->staging_sys = MALLOC(size * box->depth); |
||
757 | if (!xfer->staging_sys) |
||
758 | return false; |
||
759 | |||
760 | xfer->base.stride = stride; |
||
761 | xfer->base.layer_stride = size; |
||
762 | xfer->ptr = xfer->staging_sys; |
||
763 | |||
764 | /* see if we need to read the resource back */ |
||
765 | if (xfer->base.usage & PIPE_TRANSFER_READ) { |
||
766 | read_back = true; |
||
767 | } |
||
768 | else if (xfer->base.usage & PIPE_TRANSFER_WRITE) { |
||
769 | const unsigned discard_flags = |
||
770 | (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE); |
||
771 | |||
772 | if (!(xfer->base.usage & discard_flags)) |
||
773 | read_back = true; |
||
774 | } |
||
775 | |||
776 | if (!read_back) |
||
777 | return true; |
||
778 | |||
779 | switch (xfer->method) { |
||
780 | case ILO_TRANSFER_MAP_SW_CONVERT: |
||
781 | assert(!"no on-the-fly format conversion for mapping"); |
||
782 | success = false; |
||
783 | break; |
||
784 | case ILO_TRANSFER_MAP_SW_ZS: |
||
785 | success = tex_staging_sys_map_bo(ilo, tex, true, false); |
||
786 | if (success) { |
||
787 | tex_staging_sys_zs_read(ilo, tex, xfer); |
||
788 | tex_staging_sys_unmap_bo(ilo, tex); |
||
789 | } |
||
790 | break; |
||
791 | default: |
||
792 | assert(!"unknown mapping method"); |
||
793 | success = false; |
||
794 | break; |
||
795 | } |
||
796 | |||
797 | return success; |
||
798 | } |
||
799 | |||
800 | static void |
||
801 | tex_direct_unmap(struct ilo_context *ilo, |
||
802 | struct ilo_texture *tex, |
||
803 | struct ilo_transfer *xfer) |
||
804 | { |
||
805 | intel_bo_unmap(tex->bo); |
||
806 | } |
||
807 | |||
808 | static bool |
||
809 | tex_direct_map(struct ilo_context *ilo, |
||
810 | struct ilo_texture *tex, |
||
811 | struct ilo_transfer *xfer) |
||
812 | { |
||
813 | if (!map_bo_for_transfer(ilo, tex->bo, xfer)) |
||
814 | return false; |
||
815 | |||
816 | /* note that stride is for a block row, not a texel row */ |
||
817 | xfer->base.stride = tex->bo_stride; |
||
818 | |||
819 | /* slice stride is not always available */ |
||
820 | if (xfer->base.box.depth > 1) |
||
821 | xfer->base.layer_stride = tex_get_slice_stride(tex, xfer->base.level); |
||
822 | else |
||
823 | xfer->base.layer_stride = 0; |
||
824 | |||
825 | xfer->ptr = intel_bo_get_virtual(tex->bo); |
||
826 | xfer->ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box); |
||
827 | |||
828 | return true; |
||
829 | } |
||
830 | |||
831 | static bool |
||
832 | tex_map(struct ilo_context *ilo, struct ilo_transfer *xfer) |
||
833 | { |
||
834 | struct ilo_texture *tex = ilo_texture(xfer->base.resource); |
||
835 | bool success; |
||
836 | |||
837 | if (!choose_transfer_method(ilo, xfer)) |
||
838 | return false; |
||
839 | |||
840 | switch (xfer->method) { |
||
841 | case ILO_TRANSFER_MAP_CPU: |
||
842 | case ILO_TRANSFER_MAP_GTT: |
||
843 | case ILO_TRANSFER_MAP_UNSYNC: |
||
844 | success = tex_direct_map(ilo, tex, xfer); |
||
845 | break; |
||
846 | case ILO_TRANSFER_MAP_SW_CONVERT: |
||
847 | case ILO_TRANSFER_MAP_SW_ZS: |
||
848 | success = tex_staging_sys_map(ilo, tex, xfer); |
||
849 | break; |
||
850 | default: |
||
851 | assert(!"unknown mapping method"); |
||
852 | success = false; |
||
853 | break; |
||
854 | } |
||
855 | |||
856 | return success; |
||
857 | } |
||
858 | |||
859 | static void |
||
860 | tex_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer) |
||
861 | { |
||
862 | struct ilo_texture *tex = ilo_texture(xfer->base.resource); |
||
863 | |||
864 | switch (xfer->method) { |
||
865 | case ILO_TRANSFER_MAP_CPU: |
||
866 | case ILO_TRANSFER_MAP_GTT: |
||
867 | case ILO_TRANSFER_MAP_UNSYNC: |
||
868 | tex_direct_unmap(ilo, tex, xfer); |
||
869 | break; |
||
870 | case ILO_TRANSFER_MAP_SW_CONVERT: |
||
871 | case ILO_TRANSFER_MAP_SW_ZS: |
||
872 | tex_staging_sys_unmap(ilo, tex, xfer); |
||
873 | break; |
||
874 | default: |
||
875 | assert(!"unknown mapping method"); |
||
876 | break; |
||
877 | } |
||
878 | } |
||
879 | |||
880 | static bool |
||
881 | buf_map(struct ilo_context *ilo, struct ilo_transfer *xfer) |
||
882 | { |
||
883 | struct ilo_buffer *buf = ilo_buffer(xfer->base.resource); |
||
884 | |||
885 | if (!choose_transfer_method(ilo, xfer)) |
||
886 | return false; |
||
887 | |||
888 | if (!map_bo_for_transfer(ilo, buf->bo, xfer)) |
||
889 | return false; |
||
890 | |||
891 | assert(xfer->base.level == 0); |
||
892 | assert(xfer->base.box.y == 0); |
||
893 | assert(xfer->base.box.z == 0); |
||
894 | assert(xfer->base.box.height == 1); |
||
895 | assert(xfer->base.box.depth == 1); |
||
896 | |||
897 | xfer->base.stride = 0; |
||
898 | xfer->base.layer_stride = 0; |
||
899 | |||
900 | xfer->ptr = intel_bo_get_virtual(buf->bo); |
||
901 | xfer->ptr += xfer->base.box.x; |
||
902 | |||
903 | return true; |
||
904 | } |
||
905 | |||
906 | static void |
||
907 | buf_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer) |
||
908 | { |
||
909 | struct ilo_buffer *buf = ilo_buffer(xfer->base.resource); |
||
910 | |||
911 | intel_bo_unmap(buf->bo); |
||
912 | } |
||
913 | |||
914 | static void |
||
915 | buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf, |
||
916 | unsigned usage, int offset, int size, const void *data) |
||
917 | { |
||
918 | bool need_flush; |
||
919 | |||
920 | /* see if we can avoid stalling */ |
||
921 | if (is_bo_busy(ilo, buf->bo, &need_flush)) { |
||
922 | bool will_stall = true; |
||
923 | |||
924 | if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { |
||
925 | /* old data not needed so discard the old bo to avoid stalling */ |
||
926 | if (ilo_buffer_alloc_bo(buf)) { |
||
927 | ilo_mark_states_with_resource_dirty(ilo, &buf->base); |
||
928 | will_stall = false; |
||
929 | } |
||
930 | } |
||
931 | else { |
||
932 | /* |
||
933 | * We could allocate a temporary bo to hold the data and emit |
||
934 | * pipelined copy blit to move them to buf->bo. But for now, do |
||
935 | * nothing. |
||
936 | */ |
||
937 | } |
||
938 | |||
939 | /* flush to make bo busy (so that pwrite() stalls as it should be) */ |
||
940 | if (will_stall && need_flush) |
||
941 | ilo_cp_flush(ilo->cp); |
||
942 | } |
||
943 | |||
944 | intel_bo_pwrite(buf->bo, offset, size, data); |
||
945 | } |
||
946 | |||
947 | static void |
||
948 | ilo_transfer_flush_region(struct pipe_context *pipe, |
||
949 | struct pipe_transfer *transfer, |
||
950 | const struct pipe_box *box) |
||
951 | { |
||
952 | } |
||
953 | |||
954 | static void |
||
955 | ilo_transfer_unmap(struct pipe_context *pipe, |
||
956 | struct pipe_transfer *transfer) |
||
957 | { |
||
958 | struct ilo_context *ilo = ilo_context(pipe); |
||
959 | struct ilo_transfer *xfer = ilo_transfer(transfer); |
||
960 | |||
961 | if (xfer->base.resource->target == PIPE_BUFFER) |
||
962 | buf_unmap(ilo, xfer); |
||
963 | else |
||
964 | tex_unmap(ilo, xfer); |
||
965 | |||
966 | pipe_resource_reference(&xfer->base.resource, NULL); |
||
967 | |||
968 | util_slab_free(&ilo->transfer_mempool, xfer); |
||
969 | } |
||
970 | |||
971 | static void * |
||
972 | ilo_transfer_map(struct pipe_context *pipe, |
||
973 | struct pipe_resource *res, |
||
974 | unsigned level, |
||
975 | unsigned usage, |
||
976 | const struct pipe_box *box, |
||
977 | struct pipe_transfer **transfer) |
||
978 | { |
||
979 | struct ilo_context *ilo = ilo_context(pipe); |
||
980 | struct ilo_transfer *xfer; |
||
981 | bool success; |
||
982 | |||
983 | xfer = util_slab_alloc(&ilo->transfer_mempool); |
||
984 | if (!xfer) { |
||
985 | *transfer = NULL; |
||
986 | return NULL; |
||
987 | } |
||
988 | |||
989 | xfer->base.resource = NULL; |
||
990 | pipe_resource_reference(&xfer->base.resource, res); |
||
991 | xfer->base.level = level; |
||
992 | xfer->base.usage = usage; |
||
993 | xfer->base.box = *box; |
||
994 | |||
995 | if (res->target == PIPE_BUFFER) |
||
996 | success = buf_map(ilo, xfer); |
||
997 | else |
||
998 | success = tex_map(ilo, xfer); |
||
999 | |||
1000 | if (!success) { |
||
1001 | pipe_resource_reference(&xfer->base.resource, NULL); |
||
1002 | FREE(xfer); |
||
1003 | *transfer = NULL; |
||
1004 | return NULL; |
||
1005 | } |
||
1006 | |||
1007 | *transfer = &xfer->base; |
||
1008 | |||
1009 | return xfer->ptr; |
||
1010 | } |
||
1011 | |||
1012 | static void |
||
1013 | ilo_transfer_inline_write(struct pipe_context *pipe, |
||
1014 | struct pipe_resource *res, |
||
1015 | unsigned level, |
||
1016 | unsigned usage, |
||
1017 | const struct pipe_box *box, |
||
1018 | const void *data, |
||
1019 | unsigned stride, |
||
1020 | unsigned layer_stride) |
||
1021 | { |
||
1022 | if (likely(res->target == PIPE_BUFFER) && |
||
1023 | !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { |
||
1024 | /* they should specify just an offset and a size */ |
||
1025 | assert(level == 0); |
||
1026 | assert(box->y == 0); |
||
1027 | assert(box->z == 0); |
||
1028 | assert(box->height == 1); |
||
1029 | assert(box->depth == 1); |
||
1030 | |||
1031 | buf_pwrite(ilo_context(pipe), ilo_buffer(res), |
||
1032 | usage, box->x, box->width, data); |
||
1033 | } |
||
1034 | else { |
||
1035 | u_default_transfer_inline_write(pipe, res, |
||
1036 | level, usage, box, data, stride, layer_stride); |
||
1037 | } |
||
1038 | } |
||
1039 | |||
1040 | /** |
||
1041 | * Initialize transfer-related functions. |
||
1042 | */ |
||
1043 | void |
||
1044 | ilo_init_transfer_functions(struct ilo_context *ilo) |
||
1045 | { |
||
1046 | ilo->base.transfer_map = ilo_transfer_map; |
||
1047 | ilo->base.transfer_flush_region = ilo_transfer_flush_region; |
||
1048 | ilo->base.transfer_unmap = ilo_transfer_unmap; |
||
1049 | ilo->base.transfer_inline_write = ilo_transfer_inline_write; |
||
1050 | }>>>>>>>>>>>>>><>><>><>><>><>><>><>><>><>><>><>><>><>=>=> |