Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2003 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | #include "main/glheader.h" |
||
29 | #include "main/enums.h" |
||
30 | #include "main/mtypes.h" |
||
31 | #include "main/macros.h" |
||
32 | #include "main/fbobject.h" |
||
33 | #include "main/image.h" |
||
34 | #include "main/bufferobj.h" |
||
35 | #include "main/readpix.h" |
||
36 | #include "main/state.h" |
||
37 | #include "main/glformats.h" |
||
38 | #include "drivers/common/meta.h" |
||
39 | |||
40 | #include "brw_context.h" |
||
41 | #include "intel_screen.h" |
||
42 | #include "intel_batchbuffer.h" |
||
43 | #include "intel_blit.h" |
||
44 | #include "intel_buffers.h" |
||
45 | #include "intel_fbo.h" |
||
46 | #include "intel_mipmap_tree.h" |
||
47 | #include "intel_pixel.h" |
||
48 | #include "intel_buffer_objects.h" |
||
49 | #include "intel_tiled_memcpy.h" |
||
50 | |||
51 | #define FILE_DEBUG_FLAG DEBUG_PIXEL |
||
52 | |||
53 | /** |
||
54 | * \brief A fast path for glReadPixels |
||
55 | * |
||
56 | * This fast path is taken when the source format is BGRA, RGBA, |
||
57 | * A or L and when the texture memory is X- or Y-tiled. It downloads |
||
58 | * the source data by directly mapping the memory without a GTT fence. |
||
59 | * This then needs to be de-tiled on the CPU before presenting the data to |
||
60 | * the user in the linear fasion. |
||
61 | * |
||
62 | * This is a performance win over the conventional texture download path. |
||
63 | * In the conventional texture download path, the texture is either mapped |
||
64 | * through the GTT or copied to a linear buffer with the blitter before |
||
65 | * handing off to a software path. This allows us to avoid round-tripping |
||
66 | * through the GPU (in the case where we would be blitting) and do only a |
||
67 | * single copy operation. |
||
68 | */ |
||
69 | static bool |
||
70 | intel_readpixels_tiled_memcpy(struct gl_context * ctx, |
||
71 | GLint xoffset, GLint yoffset, |
||
72 | GLsizei width, GLsizei height, |
||
73 | GLenum format, GLenum type, |
||
74 | GLvoid * pixels, |
||
75 | const struct gl_pixelstore_attrib *pack) |
||
76 | { |
||
77 | struct brw_context *brw = brw_context(ctx); |
||
78 | struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; |
||
79 | |||
80 | /* This path supports reading from color buffers only */ |
||
81 | if (rb == NULL) |
||
82 | return false; |
||
83 | |||
84 | struct intel_renderbuffer *irb = intel_renderbuffer(rb); |
||
85 | int dst_pitch; |
||
86 | |||
87 | /* The miptree's buffer. */ |
||
88 | drm_intel_bo *bo; |
||
89 | |||
90 | int error = 0; |
||
91 | |||
92 | uint32_t cpp; |
||
93 | mem_copy_fn mem_copy = NULL; |
||
94 | |||
95 | /* This fastpath is restricted to specific renderbuffer types: |
||
96 | * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support |
||
97 | * more types. |
||
98 | */ |
||
99 | if (!brw->has_llc || |
||
100 | !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || |
||
101 | pixels == NULL || |
||
102 | _mesa_is_bufferobj(pack->BufferObj) || |
||
103 | pack->Alignment > 4 || |
||
104 | pack->SkipPixels > 0 || |
||
105 | pack->SkipRows > 0 || |
||
106 | (pack->RowLength != 0 && pack->RowLength != width) || |
||
107 | pack->SwapBytes || |
||
108 | pack->LsbFirst || |
||
109 | pack->Invert) |
||
110 | return false; |
||
111 | |||
112 | /* This renderbuffer can come from a texture. In this case, we impose |
||
113 | * some of the same restrictions we have for textures and adjust for |
||
114 | * miplevels. |
||
115 | */ |
||
116 | if (rb->TexImage) { |
||
117 | if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D && |
||
118 | rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE) |
||
119 | return false; |
||
120 | |||
121 | int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel; |
||
122 | |||
123 | /* Adjust x and y offset based on miplevel */ |
||
124 | xoffset += irb->mt->level[level].level_x; |
||
125 | yoffset += irb->mt->level[level].level_y; |
||
126 | } |
||
127 | |||
128 | /* It is possible that the renderbuffer (or underlying texture) is |
||
129 | * multisampled. Since ReadPixels from a multisampled buffer requires a |
||
130 | * multisample resolve, we can't handle this here |
||
131 | */ |
||
132 | if (rb->NumSamples > 1) |
||
133 | return false; |
||
134 | |||
135 | /* We can't handle copying from RGBX or BGRX because the tiled_memcpy |
||
136 | * function doesn't set the last channel to 1. |
||
137 | */ |
||
138 | if (rb->Format == MESA_FORMAT_B8G8R8X8_UNORM || |
||
139 | rb->Format == MESA_FORMAT_R8G8B8X8_UNORM) |
||
140 | return false; |
||
141 | |||
142 | if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp, |
||
143 | INTEL_DOWNLOAD)) |
||
144 | return false; |
||
145 | |||
146 | if (!irb->mt || |
||
147 | (irb->mt->tiling != I915_TILING_X && |
||
148 | irb->mt->tiling != I915_TILING_Y)) { |
||
149 | /* The algorithm is written only for X- or Y-tiled memory. */ |
||
150 | return false; |
||
151 | } |
||
152 | |||
153 | /* Since we are going to read raw data to the miptree, we need to resolve |
||
154 | * any pending fast color clears before we start. |
||
155 | */ |
||
156 | intel_miptree_resolve_color(brw, irb->mt); |
||
157 | |||
158 | bo = irb->mt->bo; |
||
159 | |||
160 | if (drm_intel_bo_references(brw->batch.bo, bo)) { |
||
161 | perf_debug("Flushing before mapping a referenced bo.\n"); |
||
162 | intel_batchbuffer_flush(brw); |
||
163 | } |
||
164 | |||
165 | error = brw_bo_map(brw, bo, false /* write enable */, "miptree"); |
||
166 | if (error) { |
||
167 | DBG("%s: failed to map bo\n", __func__); |
||
168 | return false; |
||
169 | } |
||
170 | |||
171 | dst_pitch = _mesa_image_row_stride(pack, width, format, type); |
||
172 | |||
173 | /* For a window-system renderbuffer, the buffer is actually flipped |
||
174 | * vertically, so we need to handle that. Since the detiling function |
||
175 | * can only really work in the forwards direction, we have to be a |
||
176 | * little creative. First, we compute the Y-offset of the first row of |
||
177 | * the renderbuffer (in renderbuffer coordinates). We then match that |
||
178 | * with the last row of the client's data. Finally, we give |
||
179 | * tiled_to_linear a negative pitch so that it walks through the |
||
180 | * client's data backwards as it walks through the renderbufer forwards. |
||
181 | */ |
||
182 | if (rb->Name == 0) { |
||
183 | yoffset = rb->Height - yoffset - height; |
||
184 | pixels += (ptrdiff_t) (height - 1) * dst_pitch; |
||
185 | dst_pitch = -dst_pitch; |
||
186 | } |
||
187 | |||
188 | /* We postponed printing this message until having committed to executing |
||
189 | * the function. |
||
190 | */ |
||
191 | DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " |
||
192 | "mesa_format=0x%x tiling=%d " |
||
193 | "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n", |
||
194 | __func__, xoffset, yoffset, width, height, |
||
195 | format, type, rb->Format, irb->mt->tiling, |
||
196 | pack->Alignment, pack->RowLength, pack->SkipPixels, |
||
197 | pack->SkipRows); |
||
198 | |||
199 | tiled_to_linear( |
||
200 | xoffset * cpp, (xoffset + width) * cpp, |
||
201 | yoffset, yoffset + height, |
||
202 | pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp, |
||
203 | bo->virtual, |
||
204 | dst_pitch, irb->mt->pitch, |
||
205 | brw->has_swizzling, |
||
206 | irb->mt->tiling, |
||
207 | mem_copy |
||
208 | ); |
||
209 | |||
210 | drm_intel_bo_unmap(bo); |
||
211 | return true; |
||
212 | } |
||
213 | |||
214 | void |
||
215 | intelReadPixels(struct gl_context * ctx, |
||
216 | GLint x, GLint y, GLsizei width, GLsizei height, |
||
217 | GLenum format, GLenum type, |
||
218 | const struct gl_pixelstore_attrib *pack, GLvoid * pixels) |
||
219 | { |
||
220 | bool ok; |
||
221 | |||
222 | struct brw_context *brw = brw_context(ctx); |
||
223 | bool dirty; |
||
224 | |||
225 | DBG("%s\n", __func__); |
||
226 | |||
227 | if (_mesa_is_bufferobj(pack->BufferObj)) { |
||
228 | if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1, |
||
229 | format, type, pixels, pack)) { |
||
230 | /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by |
||
231 | * binding the user-provided BO as a fake framebuffer and rendering |
||
232 | * to it. This breaks the invariant of the GL that nothing is able |
||
233 | * to render to a BO, causing nondeterministic corruption issues |
||
234 | * because the render cache is not coherent with a number of other |
||
235 | * caches that the BO could potentially be bound to afterwards. |
||
236 | * |
||
237 | * This could be solved in the same way that we guarantee texture |
||
238 | * coherency after a texture is attached to a framebuffer and |
||
239 | * rendered to, but that would involve checking *all* BOs bound to |
||
240 | * the pipeline for the case we need to emit a cache flush due to |
||
241 | * previous rendering to any of them -- Including vertex, index, |
||
242 | * uniform, atomic counter, shader image, transform feedback, |
||
243 | * indirect draw buffers, etc. |
||
244 | * |
||
245 | * That would increase the per-draw call overhead even though it's |
||
246 | * very unlikely that any of the BOs bound to the pipeline has been |
||
247 | * rendered to via a PBO at any point, so it seems better to just |
||
248 | * flush here unconditionally. |
||
249 | */ |
||
250 | intel_batchbuffer_emit_mi_flush(brw); |
||
251 | return; |
||
252 | } |
||
253 | |||
254 | perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); |
||
255 | } |
||
256 | |||
257 | ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height, |
||
258 | format, type, pixels, pack); |
||
259 | if(ok) |
||
260 | return; |
||
261 | |||
262 | /* glReadPixels() wont dirty the front buffer, so reset the dirty |
||
263 | * flag after calling intel_prepare_render(). */ |
||
264 | dirty = brw->front_buffer_dirty; |
||
265 | intel_prepare_render(brw); |
||
266 | brw->front_buffer_dirty = dirty; |
||
267 | |||
268 | /* Update Mesa state before calling _mesa_readpixels(). |
||
269 | * XXX this may not be needed since ReadPixels no longer uses the |
||
270 | * span code. |
||
271 | */ |
||
272 | |||
273 | if (ctx->NewState) |
||
274 | _mesa_update_state(ctx); |
||
275 | |||
276 | _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); |
||
277 | |||
278 | /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ |
||
279 | brw->front_buffer_dirty = dirty; |
||
280 | } |