Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | |
2 | * |
||
3 | * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | |||
29 | #include "main/macros.h" |
||
30 | #include "main/mtypes.h" |
||
31 | #include "main/pbo.h" |
||
32 | #include "main/texobj.h" |
||
33 | #include "main/texstore.h" |
||
34 | #include "main/texcompress.h" |
||
35 | #include "main/enums.h" |
||
36 | |||
37 | |||
38 | #include "intel_batchbuffer.h" |
||
39 | #include "intel_tex.h" |
||
40 | #include "intel_mipmap_tree.h" |
||
41 | #include "intel_blit.h" |
||
42 | |||
43 | |||
44 | |||
45 | |||
46 | intel_blit_texsubimage(struct gl_context * ctx, |
||
47 | struct gl_texture_image *texImage, |
||
48 | GLint xoffset, GLint yoffset, |
||
49 | GLint width, GLint height, |
||
50 | GLenum format, GLenum type, const void *pixels, |
||
51 | const struct gl_pixelstore_attrib *packing) |
||
52 | { |
||
53 | struct brw_context *brw = brw_context(ctx); |
||
54 | struct intel_texture_image *intelImage = intel_texture_image(texImage); |
||
55 | |||
56 | |||
57 | * currently busy. |
||
58 | */ |
||
59 | if (!intelImage->mt) |
||
60 | return false; |
||
61 | |||
62 | |||
63 | if (intelImage->mt->region->tiling == I915_TILING_Y) |
||
64 | return false; |
||
65 | |||
66 | |||
67 | return false; |
||
68 | |||
69 | |||
70 | * this because of all the overhead involved. |
||
71 | */ |
||
72 | if (brw->gen >= 6) |
||
73 | return false; |
||
74 | |||
75 | |||
76 | return false; |
||
77 | |||
78 | |||
79 | __FUNCTION__, |
||
80 | _mesa_lookup_enum_by_nr(texImage->TexObject->Target), |
||
81 | texImage->Level, xoffset, yoffset, width, height); |
||
82 | |||
83 | |||
84 | format, type, pixels, packing, |
||
85 | "glTexSubImage"); |
||
86 | if (!pixels) |
||
87 | return false; |
||
88 | |||
89 | |||
90 | intel_miptree_create(brw, GL_TEXTURE_2D, texImage->TexFormat, |
||
91 | 0, 0, |
||
92 | width, height, 1, |
||
93 | false, 0, INTEL_MIPTREE_TILING_NONE); |
||
94 | if (!temp_mt) |
||
95 | goto err; |
||
96 | |||
97 | |||
98 | if (!dst) |
||
99 | goto err; |
||
100 | |||
101 | |||
102 | texImage->TexFormat, |
||
103 | temp_mt->region->pitch, |
||
104 | &dst, |
||
105 | width, height, 1, |
||
106 | format, type, pixels, packing)) { |
||
107 | _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); |
||
108 | } |
||
109 | |||
110 | |||
111 | |||
112 | |||
113 | |||
114 | |||
115 | temp_mt, 0, 0, |
||
116 | 0, 0, false, |
||
117 | intelImage->mt, texImage->Level, texImage->Face, |
||
118 | xoffset, yoffset, false, |
||
119 | width, height, GL_COPY); |
||
120 | assert(ret); |
||
121 | |||
122 | |||
123 | _mesa_unmap_teximage_pbo(ctx, packing); |
||
124 | |||
125 | |||
126 | |||
127 | |||
128 | _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); |
||
129 | intel_miptree_release(&temp_mt); |
||
130 | _mesa_unmap_teximage_pbo(ctx, packing); |
||
131 | return false; |
||
132 | } |
||
133 | |||
134 | |||
135 | * \brief A fast path for glTexImage and glTexSubImage. |
||
136 | * |
||
137 | * \param for_glTexImage Was this called from glTexImage or glTexSubImage? |
||
138 | * |
||
139 | * This fast path is taken when the hardware natively supports the texture |
||
140 | * format (such as GL_BGRA) and when the texture memory is X-tiled. It uploads |
||
141 | * the texture data by mapping the texture memory without a GTT fence, thus |
||
142 | * acquiring a tiled view of the memory, and then memcpy'ing sucessive |
||
143 | * subspans within each tile. |
||
144 | * |
||
145 | * This is a performance win over the conventional texture upload path because |
||
146 | * it avoids the performance penalty of writing through the write-combine |
||
147 | * buffer. In the conventional texture upload path, |
||
148 | * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT |
||
149 | * fence, thus acquiring a linear view of the memory, then each row in the |
||
150 | * image is memcpy'd. In this fast path, we replace each row's memcpy with |
||
151 | * a sequence of memcpy's over each bit6 swizzle span in the row. |
||
152 | * |
||
153 | * This fast path's use case is Google Chrome's paint rectangles. Chrome (as |
||
154 | * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures. |
||
155 | * Each page's content is initially uploaded with glTexImage2D and damaged |
||
156 | * regions are updated with glTexSubImage2D. On some workloads, the |
||
157 | * performance gain of this fastpath on Sandybridge is over 5x. |
||
158 | */ |
||
159 | bool |
||
160 | intel_texsubimage_tiled_memcpy(struct gl_context * ctx, |
||
161 | GLuint dims, |
||
162 | struct gl_texture_image *texImage, |
||
163 | GLint xoffset, GLint yoffset, GLint zoffset, |
||
164 | GLsizei width, GLsizei height, GLsizei depth, |
||
165 | GLenum format, GLenum type, |
||
166 | const GLvoid *pixels, |
||
167 | const struct gl_pixelstore_attrib *packing, |
||
168 | bool for_glTexImage) |
||
169 | { |
||
170 | struct brw_context *brw = brw_context(ctx); |
||
171 | struct intel_texture_image *image = intel_texture_image(texImage); |
||
172 | |||
173 | |||
174 | drm_intel_bo *bo; |
||
175 | |||
176 | |||
177 | |||
178 | |||
179 | * a 2D BGRA texture. It could be generalized to support more types by |
||
180 | * varying the arithmetic loop below. |
||
181 | */ |
||
182 | if (!brw->has_llc || |
||
183 | format != GL_BGRA || |
||
184 | type != GL_UNSIGNED_BYTE || |
||
185 | texImage->TexFormat != MESA_FORMAT_ARGB8888 || |
||
186 | texImage->TexObject->Target != GL_TEXTURE_2D || |
||
187 | texImage->Level != 0 || |
||
188 | pixels == NULL || |
||
189 | _mesa_is_bufferobj(packing->BufferObj) || |
||
190 | packing->Alignment > 4 || |
||
191 | packing->SkipPixels > 0 || |
||
192 | packing->SkipRows > 0 || |
||
193 | (packing->RowLength != 0 && packing->RowLength != width) || |
||
194 | packing->SwapBytes || |
||
195 | packing->LsbFirst || |
||
196 | packing->Invert) |
||
197 | return false; |
||
198 | |||
199 | |||
200 | ctx->Driver.AllocTextureImageBuffer(ctx, texImage); |
||
201 | |||
202 | |||
203 | image->mt->region->tiling != I915_TILING_X) { |
||
204 | /* The algorithm below is written only for X-tiled memory. */ |
||
205 | return false; |
||
206 | } |
||
207 | |||
208 | |||
209 | * any pending fast color clears before we start. |
||
210 | */ |
||
211 | intel_miptree_resolve_color(brw, image->mt); |
||
212 | |||
213 | |||
214 | |||
215 | |||
216 | perf_debug("Flushing before mapping a referenced bo.\n"); |
||
217 | intel_batchbuffer_flush(brw); |
||
218 | } |
||
219 | |||
220 | |||
221 | if (drm_intel_bo_busy(bo)) { |
||
222 | perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); |
||
223 | } |
||
224 | } |
||
225 | |||
226 | |||
227 | if (error || bo->virtual == NULL) { |
||
228 | DBG("%s: failed to map bo\n", __FUNCTION__); |
||
229 | return false; |
||
230 | } |
||
231 | |||
232 | |||
233 | * the function. |
||
234 | */ |
||
235 | DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d)\n", |
||
236 | __FUNCTION__, texImage->Level, xoffset, yoffset, width, height); |
||
237 | |||
238 | |||
239 | * others are in units of bytes, and others (such as height) are unitless. |
||
240 | * Each variable name is suffixed with its units. |
||
241 | */ |
||
242 | |||
243 | |||
244 | const uint32_t y_max_pixels = yoffset + height; |
||
245 | |||
246 | |||
247 | |||
248 | |||
249 | const uint32_t tile_width_pixels = 128; |
||
250 | |||
251 | |||
252 | |||
253 | |||
254 | const uint32_t swizzle_width_pixels = 16; |
||
255 | |||
256 | |||
257 | const uint32_t width_tiles = stride_bytes / tile_width_bytes; |
||
258 | |||
259 | |||
260 | const uint32_t y_offset_bytes = (y_pixels / tile_height) * width_tiles * tile_size_bytes |
||
261 | + (y_pixels % tile_height) * tile_width_bytes; |
||
262 | |||
263 | |||
264 | const uint32_t x_offset_bytes = (x_pixels / tile_width_pixels) * tile_size_bytes |
||
265 | + (x_pixels % tile_width_pixels) * cpp; |
||
266 | |||
267 | |||
268 | if (brw->has_swizzling) { |
||
269 | #if 0 |
||
270 | /* Clear, unoptimized version. */ |
||
271 | bool bit6 = (offset_bytes >> 6) & 1; |
||
272 | bool bit9 = (offset_bytes >> 9) & 1; |
||
273 | bool bit10 = (offset_bytes >> 10) & 1; |
||
274 | |||
275 | |||
276 | offset_bytes ^= (1 << 6); |
||
277 | #else |
||
278 | /* Optimized, obfuscated version. */ |
||
279 | offset_bytes ^= ((offset_bytes >> 3) ^ (offset_bytes >> 4)) |
||
280 | & (1 << 6); |
||
281 | #endif |
||
282 | } |
||
283 | |||
284 | |||
285 | const uint32_t memcpy_bound_pixels = MIN2(x_max_pixels, swizzle_bound_pixels); |
||
286 | const uint32_t copy_size = cpp * (memcpy_bound_pixels - x_pixels); |
||
287 | |||
288 | |||
289 | pixels += copy_size; |
||
290 | x_pixels -= (x_pixels % swizzle_width_pixels); |
||
291 | } |
||
292 | } |
||
293 | |||
294 | |||
295 | return true; |
||
296 | } |
||
297 | |||
298 | |||
299 | intelTexSubImage(struct gl_context * ctx, |
||
300 | GLuint dims, |
||
301 | struct gl_texture_image *texImage, |
||
302 | GLint xoffset, GLint yoffset, GLint zoffset, |
||
303 | GLsizei width, GLsizei height, GLsizei depth, |
||
304 | GLenum format, GLenum type, |
||
305 | const GLvoid * pixels, |
||
306 | const struct gl_pixelstore_attrib *packing) |
||
307 | { |
||
308 | bool ok; |
||
309 | |||
310 | |||
311 | xoffset, yoffset, zoffset, |
||
312 | width, height, depth, |
||
313 | format, type, pixels, packing, |
||
314 | false /*for_glTexImage*/); |
||
315 | if (ok) |
||
316 | return; |
||
317 | |||
318 | |||
319 | if (dims != 2 || !intel_blit_texsubimage(ctx, texImage, |
||
320 | xoffset, yoffset, |
||
321 | width, height, |
||
322 | format, type, pixels, packing)) { |
||
323 | _mesa_store_texsubimage(ctx, dims, texImage, |
||
324 | xoffset, yoffset, zoffset, |
||
325 | width, height, depth, |
||
326 | format, type, pixels, packing); |
||
327 | } |
||
328 | } |
||
329 | |||
330 | |||
331 | intelInitTextureSubImageFuncs(struct dd_function_table *functions) |
||
332 | { |
||
333 | functions->TexSubImage = intelTexSubImage; |
||
334 | }><>><>>> |
||
335 |