Rev 4315 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4315 | Serge | 1 | /* |
2 | * Copyright © 2006,2008 Intel Corporation |
||
3 | * Copyright © 2007 Red Hat, Inc. |
||
4 | * |
||
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
6 | * copy of this software and associated documentation files (the "Software"), |
||
7 | * to deal in the Software without restriction, including without limitation |
||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
9 | * and/or sell copies of the Software, and to permit persons to whom the |
||
10 | * Software is furnished to do so, subject to the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the next |
||
13 | * paragraph) shall be included in all copies or substantial portions of the |
||
14 | * Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
22 | * SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Wang Zhenyu |
||
26 | * Eric Anholt |
||
27 | * Carl Worth |
||
28 | * Keith Packard |
||
29 | * |
||
30 | */ |
||
31 | |||
32 | #ifdef HAVE_CONFIG_H |
||
33 | #include "config.h" |
||
34 | #endif |
||
35 | |||
36 | #include |
||
37 | #include |
||
38 | #include |
||
39 | |||
40 | #include |
||
41 | //#include "xf86.h" |
||
42 | #include "intel.h" |
||
43 | #include "i830_reg.h" |
||
44 | #include "i965_reg.h" |
||
45 | |||
46 | /* bring in brw structs */ |
||
47 | #include "brw_defines.h" |
||
48 | #include "brw_structs.h" |
||
49 | |||
50 | #define intel_debug_fallback printf |
||
51 | |||
4348 | Serge | 52 | #define DBG printf |
4315 | Serge | 53 | |
54 | // refer vol2, 3d rasterization 3.8.1 |
||
55 | |||
56 | /* defined in brw_defines.h */ |
||
57 | static const struct blendinfo { |
||
58 | Bool dst_alpha; |
||
59 | Bool src_alpha; |
||
60 | uint32_t src_blend; |
||
61 | uint32_t dst_blend; |
||
62 | } i965_blend_op[] = { |
||
63 | /* Clear */ |
||
64 | {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, |
||
65 | /* Src */ |
||
66 | {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, |
||
67 | /* Dst */ |
||
68 | {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, |
||
69 | /* Over */ |
||
70 | {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, |
||
71 | /* OverReverse */ |
||
72 | {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, |
||
73 | /* In */ |
||
74 | {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, |
||
75 | /* InReverse */ |
||
76 | {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, |
||
77 | /* Out */ |
||
78 | {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, |
||
79 | /* OutReverse */ |
||
80 | {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, |
||
81 | /* Atop */ |
||
82 | {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, |
||
83 | /* AtopReverse */ |
||
84 | {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, |
||
85 | /* Xor */ |
||
86 | {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, |
||
87 | /* Add */ |
||
88 | {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, |
||
89 | }; |
||
90 | |||
91 | /** |
||
92 | * Highest-valued BLENDFACTOR used in i965_blend_op. |
||
93 | * |
||
94 | * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, |
||
95 | * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, |
||
96 | * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} |
||
97 | */ |
||
98 | #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) |
||
99 | |||
100 | /* FIXME: surface format defined in brw_defines.h, shared Sampling engine |
||
101 | * 1.7.2 |
||
102 | */ |
||
103 | static const struct formatinfo { |
||
104 | int fmt; |
||
105 | uint32_t card_fmt; |
||
106 | } i965_tex_formats[] = { |
||
107 | {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM}, |
||
108 | {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM}, |
||
109 | {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM}, |
||
110 | {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM}, |
||
111 | {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM}, |
||
112 | {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM}, |
||
113 | {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM}, |
||
114 | {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM}, |
||
115 | #if XORG_VERSION_CURRENT >= 10699900 |
||
116 | {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM}, |
||
117 | {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, |
||
118 | {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM}, |
||
119 | {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, |
||
120 | #endif |
||
121 | {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM}, |
||
122 | }; |
||
123 | |||
124 | static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format, |
||
125 | uint32_t * sblend, uint32_t * dblend) |
||
126 | { |
||
127 | |||
128 | *sblend = i965_blend_op[op].src_blend; |
||
129 | *dblend = i965_blend_op[op].dst_blend; |
||
130 | |||
131 | /* If there's no dst alpha channel, adjust the blend op so that we'll treat |
||
132 | * it as always 1. |
||
133 | */ |
||
134 | if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { |
||
135 | if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) |
||
136 | *sblend = BRW_BLENDFACTOR_ONE; |
||
137 | else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) |
||
138 | *sblend = BRW_BLENDFACTOR_ZERO; |
||
139 | } |
||
140 | |||
141 | /* If the source alpha is being used, then we should only be in a case where |
||
142 | * the source blend factor is 0, and the source blend value is the mask |
||
143 | * channels multiplied by the source picture's alpha. |
||
144 | */ |
||
145 | if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) |
||
146 | && i965_blend_op[op].src_alpha) { |
||
147 | if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { |
||
148 | *dblend = BRW_BLENDFACTOR_SRC_COLOR; |
||
149 | } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { |
||
150 | *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; |
||
151 | } |
||
152 | } |
||
153 | |||
154 | } |
||
155 | |||
156 | static uint32_t i965_get_dest_format(PicturePtr dest_picture) |
||
157 | { |
||
158 | switch (dest_picture->format) { |
||
159 | case PICT_a8r8g8b8: |
||
160 | case PICT_x8r8g8b8: |
||
161 | return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; |
||
162 | case PICT_a8b8g8r8: |
||
163 | case PICT_x8b8g8r8: |
||
164 | return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; |
||
165 | #if XORG_VERSION_CURRENT >= 10699900 |
||
166 | case PICT_a2r10g10b10: |
||
167 | case PICT_x2r10g10b10: |
||
168 | return BRW_SURFACEFORMAT_B10G10R10A2_UNORM; |
||
169 | #endif |
||
170 | case PICT_r5g6b5: |
||
171 | return BRW_SURFACEFORMAT_B5G6R5_UNORM; |
||
172 | case PICT_x1r5g5b5: |
||
173 | case PICT_a1r5g5b5: |
||
174 | return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; |
||
175 | case PICT_a8: |
||
176 | return BRW_SURFACEFORMAT_A8_UNORM; |
||
177 | case PICT_a4r4g4b4: |
||
178 | case PICT_x4r4g4b4: |
||
179 | return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; |
||
180 | default: |
||
181 | return -1; |
||
182 | } |
||
183 | } |
||
184 | |||
185 | Bool |
||
186 | i965_check_composite(int op, |
||
187 | PicturePtr source_picture, |
||
188 | PicturePtr mask_picture, |
||
189 | PicturePtr dest_picture, |
||
190 | int width, int height) |
||
191 | { |
||
192 | /* Check for unsupported compositing operations. */ |
||
193 | if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { |
||
194 | intel_debug_fallback("Unsupported Composite op 0x%x\n", op); |
||
195 | return FALSE; |
||
196 | } |
||
197 | |||
198 | if (mask_picture && mask_picture->componentAlpha && |
||
199 | PICT_FORMAT_RGB(mask_picture->format)) { |
||
200 | /* Check if it's component alpha that relies on a source alpha and on |
||
201 | * the source value. We can only get one of those into the single |
||
202 | * source value that we get to blend with. |
||
203 | */ |
||
204 | if (i965_blend_op[op].src_alpha && |
||
205 | (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { |
||
206 | intel_debug_fallback("Component alpha not supported " |
||
207 | "with source alpha and source " |
||
208 | "value blending.\n"); |
||
209 | return FALSE; |
||
210 | } |
||
211 | } |
||
212 | |||
213 | if (i965_get_dest_format(dest_picture) == -1) { |
||
214 | intel_debug_fallback("Usupported Color buffer format 0x%x\n", |
||
215 | (int)dest_picture->format); |
||
216 | return FALSE; |
||
217 | } |
||
218 | |||
219 | return TRUE; |
||
220 | } |
||
221 | |||
222 | Bool |
||
223 | i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) |
||
224 | { |
||
225 | if (picture->repeatType > RepeatReflect) { |
||
226 | intel_debug_fallback("extended repeat (%d) not supported\n", |
||
227 | picture->repeatType); |
||
228 | return FALSE; |
||
229 | } |
||
230 | |||
231 | if (picture->filter != PictFilterNearest && |
||
232 | picture->filter != PictFilterBilinear) { |
||
233 | intel_debug_fallback("Unsupported filter 0x%x\n", picture->filter); |
||
234 | return FALSE; |
||
235 | } |
||
236 | |||
237 | if (picture->pDrawable) { |
||
238 | int w, h, i; |
||
239 | |||
240 | w = picture->pDrawable->width; |
||
241 | h = picture->pDrawable->height; |
||
242 | if ((w > 8192) || (h > 8192)) { |
||
243 | intel_debug_fallback( "Picture w/h too large (%dx%d)\n",w, h); |
||
244 | return FALSE; |
||
245 | } |
||
246 | |||
247 | for (i = 0; |
||
248 | i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); |
||
249 | i++) { |
||
250 | if (i965_tex_formats[i].fmt == picture->format) |
||
251 | break; |
||
252 | } |
||
253 | if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) |
||
254 | { |
||
255 | intel_debug_fallback("Unsupported picture format " |
||
256 | "0x%x\n", (int)picture->format); |
||
257 | return FALSE; |
||
258 | } |
||
259 | |||
260 | return TRUE; |
||
261 | } |
||
262 | |||
263 | return FALSE; |
||
264 | } |
||
265 | |||
266 | |||
267 | #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) |
||
268 | |||
269 | /* Set up a default static partitioning of the URB, which is supposed to |
||
270 | * allow anything we would want to do, at potentially lower performance. |
||
271 | */ |
||
272 | #define URB_CS_ENTRY_SIZE 0 |
||
273 | #define URB_CS_ENTRIES 0 |
||
274 | |||
275 | #define URB_VS_ENTRY_SIZE 1 // each 512-bit row |
||
276 | #define URB_VS_ENTRIES 8 // we needs at least 8 entries |
||
277 | |||
278 | #define URB_GS_ENTRY_SIZE 0 |
||
279 | #define URB_GS_ENTRIES 0 |
||
280 | |||
281 | #define URB_CLIP_ENTRY_SIZE 0 |
||
282 | #define URB_CLIP_ENTRIES 0 |
||
283 | |||
284 | #define URB_SF_ENTRY_SIZE 2 |
||
285 | #define URB_SF_ENTRIES 1 |
||
286 | |||
287 | /* |
||
288 | * this program computes dA/dx and dA/dy for the texture coordinates along |
||
289 | * with the base texture coordinate. It was extracted from the Mesa driver |
||
290 | */ |
||
291 | |||
292 | #define SF_KERNEL_NUM_GRF 16 |
||
293 | #define SF_MAX_THREADS 2 |
||
294 | |||
295 | static const uint32_t sf_kernel_static[][4] = { |
||
296 | #include "exa_sf.g4b" |
||
297 | }; |
||
298 | |||
299 | static const uint32_t sf_kernel_mask_static[][4] = { |
||
300 | #include "exa_sf_mask.g4b" |
||
301 | }; |
||
302 | |||
303 | /* ps kernels */ |
||
304 | #define PS_KERNEL_NUM_GRF 32 |
||
305 | #define PS_MAX_THREADS 48 |
||
306 | |||
307 | static const uint32_t ps_kernel_nomask_affine_static[][4] = { |
||
308 | #include "exa_wm_xy.g4b" |
||
309 | #include "exa_wm_src_affine.g4b" |
||
310 | #include "exa_wm_src_sample_argb.g4b" |
||
311 | #include "exa_wm_write.g4b" |
||
312 | }; |
||
313 | |||
314 | static const uint32_t ps_kernel_nomask_projective_static[][4] = { |
||
315 | #include "exa_wm_xy.g4b" |
||
316 | #include "exa_wm_src_projective.g4b" |
||
317 | #include "exa_wm_src_sample_argb.g4b" |
||
318 | #include "exa_wm_write.g4b" |
||
319 | }; |
||
320 | |||
321 | static const uint32_t ps_kernel_maskca_affine_static[][4] = { |
||
322 | #include "exa_wm_xy.g4b" |
||
323 | #include "exa_wm_src_affine.g4b" |
||
324 | #include "exa_wm_src_sample_argb.g4b" |
||
325 | #include "exa_wm_mask_affine.g4b" |
||
326 | #include "exa_wm_mask_sample_argb.g4b" |
||
327 | #include "exa_wm_ca.g4b" |
||
328 | #include "exa_wm_write.g4b" |
||
329 | }; |
||
330 | |||
331 | static const uint32_t ps_kernel_maskca_projective_static[][4] = { |
||
332 | #include "exa_wm_xy.g4b" |
||
333 | #include "exa_wm_src_projective.g4b" |
||
334 | #include "exa_wm_src_sample_argb.g4b" |
||
335 | #include "exa_wm_mask_projective.g4b" |
||
336 | #include "exa_wm_mask_sample_argb.g4b" |
||
337 | #include "exa_wm_ca.g4b" |
||
338 | #include "exa_wm_write.g4b" |
||
339 | }; |
||
340 | |||
341 | static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = { |
||
342 | #include "exa_wm_xy.g4b" |
||
343 | #include "exa_wm_src_affine.g4b" |
||
344 | #include "exa_wm_src_sample_a.g4b" |
||
345 | #include "exa_wm_mask_affine.g4b" |
||
346 | #include "exa_wm_mask_sample_argb.g4b" |
||
347 | #include "exa_wm_ca_srcalpha.g4b" |
||
348 | #include "exa_wm_write.g4b" |
||
349 | }; |
||
350 | |||
351 | static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = { |
||
352 | #include "exa_wm_xy.g4b" |
||
353 | #include "exa_wm_src_projective.g4b" |
||
354 | #include "exa_wm_src_sample_a.g4b" |
||
355 | #include "exa_wm_mask_projective.g4b" |
||
356 | #include "exa_wm_mask_sample_argb.g4b" |
||
357 | #include "exa_wm_ca_srcalpha.g4b" |
||
358 | #include "exa_wm_write.g4b" |
||
359 | }; |
||
360 | |||
361 | static const uint32_t ps_kernel_masknoca_affine_static[][4] = { |
||
362 | #include "exa_wm_xy.g4b" |
||
363 | #include "exa_wm_src_affine.g4b" |
||
364 | #include "exa_wm_src_sample_argb.g4b" |
||
365 | #include "exa_wm_mask_affine.g4b" |
||
366 | #include "exa_wm_mask_sample_a.g4b" |
||
367 | #include "exa_wm_noca.g4b" |
||
368 | #include "exa_wm_write.g4b" |
||
369 | }; |
||
370 | |||
371 | static const uint32_t ps_kernel_masknoca_projective_static[][4] = { |
||
372 | #include "exa_wm_xy.g4b" |
||
373 | #include "exa_wm_src_projective.g4b" |
||
374 | #include "exa_wm_src_sample_argb.g4b" |
||
375 | #include "exa_wm_mask_projective.g4b" |
||
376 | #include "exa_wm_mask_sample_a.g4b" |
||
377 | #include "exa_wm_noca.g4b" |
||
378 | #include "exa_wm_write.g4b" |
||
379 | }; |
||
380 | |||
381 | /* new programs for Ironlake */ |
||
382 | static const uint32_t sf_kernel_static_gen5[][4] = { |
||
383 | #include "exa_sf.g4b.gen5" |
||
384 | }; |
||
385 | |||
386 | static const uint32_t sf_kernel_mask_static_gen5[][4] = { |
||
387 | #include "exa_sf_mask.g4b.gen5" |
||
388 | }; |
||
389 | |||
390 | static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = { |
||
391 | #include "exa_wm_xy.g4b.gen5" |
||
392 | #include "exa_wm_src_affine.g4b.gen5" |
||
393 | #include "exa_wm_src_sample_argb.g4b.gen5" |
||
394 | #include "exa_wm_write.g4b.gen5" |
||
395 | }; |
||
396 | |||
397 | static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = { |
||
398 | #include "exa_wm_xy.g4b.gen5" |
||
399 | #include "exa_wm_src_projective.g4b.gen5" |
||
400 | #include "exa_wm_src_sample_argb.g4b.gen5" |
||
401 | #include "exa_wm_write.g4b.gen5" |
||
402 | }; |
||
403 | |||
404 | static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = { |
||
405 | #include "exa_wm_xy.g4b.gen5" |
||
406 | #include "exa_wm_src_affine.g4b.gen5" |
||
407 | #include "exa_wm_src_sample_argb.g4b.gen5" |
||
408 | #include "exa_wm_mask_affine.g4b.gen5" |
||
409 | #include "exa_wm_mask_sample_argb.g4b.gen5" |
||
410 | #include "exa_wm_ca.g4b.gen5" |
||
411 | #include "exa_wm_write.g4b.gen5" |
||
412 | }; |
||
413 | |||
414 | static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = { |
||
415 | #include "exa_wm_xy.g4b.gen5" |
||
416 | #include "exa_wm_src_projective.g4b.gen5" |
||
417 | #include "exa_wm_src_sample_argb.g4b.gen5" |
||
418 | #include "exa_wm_mask_projective.g4b.gen5" |
||
419 | #include "exa_wm_mask_sample_argb.g4b.gen5" |
||
420 | #include "exa_wm_ca.g4b.gen5" |
||
421 | #include "exa_wm_write.g4b.gen5" |
||
422 | }; |
||
423 | |||
424 | static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = { |
||
425 | #include "exa_wm_xy.g4b.gen5" |
||
426 | #include "exa_wm_src_affine.g4b.gen5" |
||
427 | #include "exa_wm_src_sample_a.g4b.gen5" |
||
428 | #include "exa_wm_mask_affine.g4b.gen5" |
||
429 | #include "exa_wm_mask_sample_argb.g4b.gen5" |
||
430 | #include "exa_wm_ca_srcalpha.g4b.gen5" |
||
431 | #include "exa_wm_write.g4b.gen5" |
||
432 | }; |
||
433 | |||
434 | static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = { |
||
435 | #include "exa_wm_xy.g4b.gen5" |
||
436 | #include "exa_wm_src_projective.g4b.gen5" |
||
437 | #include "exa_wm_src_sample_a.g4b.gen5" |
||
438 | #include "exa_wm_mask_projective.g4b.gen5" |
||
439 | #include "exa_wm_mask_sample_argb.g4b.gen5" |
||
440 | #include "exa_wm_ca_srcalpha.g4b.gen5" |
||
441 | #include "exa_wm_write.g4b.gen5" |
||
442 | }; |
||
443 | |||
444 | static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = { |
||
445 | #include "exa_wm_xy.g4b.gen5" |
||
446 | #include "exa_wm_src_affine.g4b.gen5" |
||
447 | #include "exa_wm_src_sample_argb.g4b.gen5" |
||
448 | #include "exa_wm_mask_affine.g4b.gen5" |
||
449 | #include "exa_wm_mask_sample_a.g4b.gen5" |
||
450 | #include "exa_wm_noca.g4b.gen5" |
||
451 | #include "exa_wm_write.g4b.gen5" |
||
452 | }; |
||
453 | |||
454 | static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = { |
||
455 | #include "exa_wm_xy.g4b.gen5" |
||
456 | #include "exa_wm_src_projective.g4b.gen5" |
||
457 | #include "exa_wm_src_sample_argb.g4b.gen5" |
||
458 | #include "exa_wm_mask_projective.g4b.gen5" |
||
459 | #include "exa_wm_mask_sample_a.g4b.gen5" |
||
460 | #include "exa_wm_noca.g4b.gen5" |
||
461 | #include "exa_wm_write.g4b.gen5" |
||
462 | }; |
||
463 | |||
464 | /* programs for GEN6 */ |
||
465 | static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = { |
||
466 | #include "exa_wm_src_affine.g6b" |
||
467 | #include "exa_wm_src_sample_argb.g6b" |
||
468 | #include "exa_wm_write.g6b" |
||
469 | }; |
||
470 | |||
471 | static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = { |
||
472 | #include "exa_wm_src_projective.g6b" |
||
473 | #include "exa_wm_src_sample_argb.g6b" |
||
474 | #include "exa_wm_write.g6b" |
||
475 | }; |
||
476 | |||
477 | static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = { |
||
478 | #include "exa_wm_src_affine.g6b" |
||
479 | #include "exa_wm_src_sample_argb.g6b" |
||
480 | #include "exa_wm_mask_affine.g6b" |
||
481 | #include "exa_wm_mask_sample_argb.g6b" |
||
482 | #include "exa_wm_ca.g6b" |
||
483 | #include "exa_wm_write.g6b" |
||
484 | }; |
||
485 | |||
486 | static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = { |
||
487 | #include "exa_wm_src_projective.g6b" |
||
488 | #include "exa_wm_src_sample_argb.g6b" |
||
489 | #include "exa_wm_mask_projective.g6b" |
||
490 | #include "exa_wm_mask_sample_argb.g6b" |
||
491 | #include "exa_wm_ca.g4b.gen5" |
||
492 | #include "exa_wm_write.g6b" |
||
493 | }; |
||
494 | |||
495 | static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = { |
||
496 | #include "exa_wm_src_affine.g6b" |
||
497 | #include "exa_wm_src_sample_a.g6b" |
||
498 | #include "exa_wm_mask_affine.g6b" |
||
499 | #include "exa_wm_mask_sample_argb.g6b" |
||
500 | #include "exa_wm_ca_srcalpha.g6b" |
||
501 | #include "exa_wm_write.g6b" |
||
502 | }; |
||
503 | |||
504 | static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = { |
||
505 | #include "exa_wm_src_projective.g6b" |
||
506 | #include "exa_wm_src_sample_a.g6b" |
||
507 | #include "exa_wm_mask_projective.g6b" |
||
508 | #include "exa_wm_mask_sample_argb.g6b" |
||
509 | #include "exa_wm_ca_srcalpha.g6b" |
||
510 | #include "exa_wm_write.g6b" |
||
511 | }; |
||
512 | |||
513 | static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = { |
||
514 | #include "exa_wm_src_affine.g6b" |
||
515 | #include "exa_wm_src_sample_argb.g6b" |
||
516 | #include "exa_wm_mask_affine.g6b" |
||
517 | #include "exa_wm_mask_sample_a.g6b" |
||
518 | #include "exa_wm_noca.g6b" |
||
519 | #include "exa_wm_write.g6b" |
||
520 | }; |
||
521 | |||
522 | static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = { |
||
523 | #include "exa_wm_src_projective.g6b" |
||
524 | #include "exa_wm_src_sample_argb.g6b" |
||
525 | #include "exa_wm_mask_projective.g6b" |
||
526 | #include "exa_wm_mask_sample_a.g6b" |
||
527 | #include "exa_wm_noca.g6b" |
||
528 | #include "exa_wm_write.g6b" |
||
529 | }; |
||
530 | |||
531 | /* programs for GEN7 */ |
||
532 | static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = { |
||
533 | #include "exa_wm_src_affine.g7b" |
||
534 | #include "exa_wm_src_sample_argb.g7b" |
||
535 | #include "exa_wm_write.g7b" |
||
536 | }; |
||
537 | |||
538 | static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = { |
||
539 | #include "exa_wm_src_projective.g7b" |
||
540 | #include "exa_wm_src_sample_argb.g7b" |
||
541 | #include "exa_wm_write.g7b" |
||
542 | }; |
||
543 | |||
544 | static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = { |
||
545 | #include "exa_wm_src_affine.g7b" |
||
546 | #include "exa_wm_src_sample_argb.g7b" |
||
547 | #include "exa_wm_mask_affine.g7b" |
||
548 | #include "exa_wm_mask_sample_argb.g7b" |
||
549 | #include "exa_wm_ca.g6b" |
||
550 | #include "exa_wm_write.g7b" |
||
551 | }; |
||
552 | |||
553 | static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = { |
||
554 | #include "exa_wm_src_projective.g7b" |
||
555 | #include "exa_wm_src_sample_argb.g7b" |
||
556 | #include "exa_wm_mask_projective.g7b" |
||
557 | #include "exa_wm_mask_sample_argb.g7b" |
||
558 | #include "exa_wm_ca.g4b.gen5" |
||
559 | #include "exa_wm_write.g7b" |
||
560 | }; |
||
561 | |||
562 | static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = { |
||
563 | #include "exa_wm_src_affine.g7b" |
||
564 | #include "exa_wm_src_sample_a.g7b" |
||
565 | #include "exa_wm_mask_affine.g7b" |
||
566 | #include "exa_wm_mask_sample_argb.g7b" |
||
567 | #include "exa_wm_ca_srcalpha.g6b" |
||
568 | #include "exa_wm_write.g7b" |
||
569 | }; |
||
570 | |||
571 | static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = { |
||
572 | #include "exa_wm_src_projective.g7b" |
||
573 | #include "exa_wm_src_sample_a.g7b" |
||
574 | #include "exa_wm_mask_projective.g7b" |
||
575 | #include "exa_wm_mask_sample_argb.g7b" |
||
576 | #include "exa_wm_ca_srcalpha.g6b" |
||
577 | #include "exa_wm_write.g7b" |
||
578 | }; |
||
579 | |||
580 | static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = { |
||
581 | #include "exa_wm_src_affine.g7b" |
||
582 | #include "exa_wm_src_sample_argb.g7b" |
||
583 | #include "exa_wm_mask_affine.g7b" |
||
584 | #include "exa_wm_mask_sample_a.g7b" |
||
585 | #include "exa_wm_noca.g6b" |
||
586 | #include "exa_wm_write.g7b" |
||
587 | }; |
||
588 | |||
589 | static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = { |
||
590 | #include "exa_wm_src_projective.g7b" |
||
591 | #include "exa_wm_src_sample_argb.g7b" |
||
592 | #include "exa_wm_mask_projective.g7b" |
||
593 | #include "exa_wm_mask_sample_a.g7b" |
||
594 | #include "exa_wm_noca.g6b" |
||
595 | #include "exa_wm_write.g7b" |
||
596 | }; |
||
597 | |||
598 | |||
599 | typedef enum { |
||
600 | SS_INVALID_FILTER = -1, |
||
601 | SS_FILTER_NEAREST, |
||
602 | SS_FILTER_BILINEAR, |
||
603 | FILTER_COUNT, |
||
604 | } sampler_state_filter_t; |
||
605 | |||
606 | typedef enum { |
||
607 | SS_INVALID_EXTEND = -1, |
||
608 | SS_EXTEND_NONE, |
||
609 | SS_EXTEND_REPEAT, |
||
610 | SS_EXTEND_PAD, |
||
611 | SS_EXTEND_REFLECT, |
||
612 | EXTEND_COUNT, |
||
613 | } sampler_state_extend_t; |
||
614 | |||
615 | typedef enum { |
||
616 | WM_KERNEL_NOMASK_AFFINE, |
||
617 | WM_KERNEL_NOMASK_PROJECTIVE, |
||
618 | WM_KERNEL_MASKCA_AFFINE, |
||
619 | WM_KERNEL_MASKCA_PROJECTIVE, |
||
620 | WM_KERNEL_MASKCA_SRCALPHA_AFFINE, |
||
621 | WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, |
||
622 | WM_KERNEL_MASKNOCA_AFFINE, |
||
623 | WM_KERNEL_MASKNOCA_PROJECTIVE, |
||
624 | KERNEL_COUNT |
||
625 | } wm_kernel_t; |
||
626 | |||
627 | #define KERNEL(kernel_enum, kernel, masked) \ |
||
628 | [kernel_enum] = {&kernel, sizeof(kernel), masked} |
||
629 | struct wm_kernel_info { |
||
630 | const void *data; |
||
631 | unsigned int size; |
||
632 | Bool has_mask; |
||
633 | }; |
||
634 | |||
635 | static const struct wm_kernel_info wm_kernels_gen4[] = { |
||
636 | KERNEL(WM_KERNEL_NOMASK_AFFINE, |
||
637 | ps_kernel_nomask_affine_static, FALSE), |
||
638 | KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, |
||
639 | ps_kernel_nomask_projective_static, FALSE), |
||
640 | KERNEL(WM_KERNEL_MASKCA_AFFINE, |
||
641 | ps_kernel_maskca_affine_static, TRUE), |
||
642 | KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, |
||
643 | ps_kernel_maskca_projective_static, TRUE), |
||
644 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, |
||
645 | ps_kernel_maskca_srcalpha_affine_static, TRUE), |
||
646 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, |
||
647 | ps_kernel_maskca_srcalpha_projective_static, TRUE), |
||
648 | KERNEL(WM_KERNEL_MASKNOCA_AFFINE, |
||
649 | ps_kernel_masknoca_affine_static, TRUE), |
||
650 | KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, |
||
651 | ps_kernel_masknoca_projective_static, TRUE), |
||
652 | }; |
||
653 | |||
654 | static const struct wm_kernel_info wm_kernels_gen5[] = { |
||
655 | KERNEL(WM_KERNEL_NOMASK_AFFINE, |
||
656 | ps_kernel_nomask_affine_static_gen5, FALSE), |
||
657 | KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, |
||
658 | ps_kernel_nomask_projective_static_gen5, FALSE), |
||
659 | KERNEL(WM_KERNEL_MASKCA_AFFINE, |
||
660 | ps_kernel_maskca_affine_static_gen5, TRUE), |
||
661 | KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, |
||
662 | ps_kernel_maskca_projective_static_gen5, TRUE), |
||
663 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, |
||
664 | ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), |
||
665 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, |
||
666 | ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), |
||
667 | KERNEL(WM_KERNEL_MASKNOCA_AFFINE, |
||
668 | ps_kernel_masknoca_affine_static_gen5, TRUE), |
||
669 | KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, |
||
670 | ps_kernel_masknoca_projective_static_gen5, TRUE), |
||
671 | }; |
||
672 | |||
673 | static const struct wm_kernel_info wm_kernels_gen6[] = { |
||
674 | KERNEL(WM_KERNEL_NOMASK_AFFINE, |
||
675 | ps_kernel_nomask_affine_static_gen6, FALSE), |
||
676 | KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, |
||
677 | ps_kernel_nomask_projective_static_gen6, FALSE), |
||
678 | KERNEL(WM_KERNEL_MASKCA_AFFINE, |
||
679 | ps_kernel_maskca_affine_static_gen6, TRUE), |
||
680 | KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, |
||
681 | ps_kernel_maskca_projective_static_gen6, TRUE), |
||
682 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, |
||
683 | ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE), |
||
684 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, |
||
685 | ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE), |
||
686 | KERNEL(WM_KERNEL_MASKNOCA_AFFINE, |
||
687 | ps_kernel_masknoca_affine_static_gen6, TRUE), |
||
688 | KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, |
||
689 | ps_kernel_masknoca_projective_static_gen6, TRUE), |
||
690 | }; |
||
691 | |||
692 | static const struct wm_kernel_info wm_kernels_gen7[] = { |
||
693 | KERNEL(WM_KERNEL_NOMASK_AFFINE, |
||
694 | ps_kernel_nomask_affine_static_gen7, FALSE), |
||
695 | KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, |
||
696 | ps_kernel_nomask_projective_static_gen7, FALSE), |
||
697 | KERNEL(WM_KERNEL_MASKCA_AFFINE, |
||
698 | ps_kernel_maskca_affine_static_gen7, TRUE), |
||
699 | KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, |
||
700 | ps_kernel_maskca_projective_static_gen7, TRUE), |
||
701 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, |
||
702 | ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE), |
||
703 | KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, |
||
704 | ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE), |
||
705 | KERNEL(WM_KERNEL_MASKNOCA_AFFINE, |
||
706 | ps_kernel_masknoca_affine_static_gen7, TRUE), |
||
707 | KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, |
||
708 | ps_kernel_masknoca_projective_static_gen7, TRUE), |
||
709 | }; |
||
710 | |||
711 | #undef KERNEL |
||
712 | |||
713 | typedef struct _brw_cc_unit_state_padded { |
||
714 | struct brw_cc_unit_state state; |
||
715 | char pad[64 - sizeof(struct brw_cc_unit_state)]; |
||
716 | } brw_cc_unit_state_padded; |
||
717 | |||
718 | #ifndef MAX |
||
719 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
||
720 | #endif |
||
721 | #define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32) |
||
722 | |||
723 | struct gen4_cc_unit_state { |
||
724 | /* Index by [src_blend][dst_blend] */ |
||
725 | brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT]; |
||
726 | }; |
||
727 | |||
728 | typedef struct gen4_composite_op { |
||
729 | int op; |
||
730 | sampler_state_filter_t src_filter; |
||
731 | sampler_state_filter_t mask_filter; |
||
732 | sampler_state_extend_t src_extend; |
||
733 | sampler_state_extend_t mask_extend; |
||
734 | Bool is_affine; |
||
735 | wm_kernel_t wm_kernel; |
||
736 | int vertex_id; |
||
737 | } gen4_composite_op; |
||
738 | |||
739 | /** Private data for gen4 render accel implementation. */ |
||
740 | struct gen4_render_state { |
||
741 | drm_intel_bo *vs_state_bo; |
||
742 | drm_intel_bo *sf_state_bo; |
||
743 | drm_intel_bo *sf_mask_state_bo; |
||
744 | drm_intel_bo *cc_state_bo; |
||
745 | drm_intel_bo *wm_state_bo[KERNEL_COUNT] |
||
746 | [FILTER_COUNT] [EXTEND_COUNT] |
||
747 | [FILTER_COUNT] [EXTEND_COUNT]; |
||
748 | drm_intel_bo *wm_kernel_bo[KERNEL_COUNT]; |
||
749 | |||
750 | drm_intel_bo *cc_vp_bo; |
||
751 | drm_intel_bo *gen6_blend_bo; |
||
752 | drm_intel_bo *gen6_depth_stencil_bo; |
||
753 | drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT] |
||
754 | [EXTEND_COUNT] |
||
755 | [FILTER_COUNT] |
||
756 | [EXTEND_COUNT]; |
||
757 | gen4_composite_op composite_op; |
||
758 | }; |
||
759 | |||
760 | static void gen6_emit_composite_state(struct intel_screen_private *intel); |
||
761 | static void gen6_render_state_init(); |
||
762 | |||
763 | /** |
||
764 | * Sets up the SF state pointing at an SF kernel. |
||
765 | * |
||
766 | * The SF kernel does coord interp: for each attribute, |
||
767 | * calculate dA/dx and dA/dy. Hand these interpolation coefficients |
||
768 | * back to SF which then hands pixels off to WM. |
||
769 | */ |
||
770 | static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel, |
||
771 | drm_intel_bo * kernel_bo) |
||
772 | { |
||
773 | struct brw_sf_unit_state *sf_state; |
||
774 | drm_intel_bo *sf_state_bo; |
||
775 | int ret; |
||
776 | |||
777 | sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state", |
||
778 | sizeof(*sf_state), 4096); |
||
779 | assert(sf_state_bo); |
||
780 | |||
781 | ret = drm_intel_bo_map(sf_state_bo, TRUE); |
||
782 | assert(ret == 0); |
||
783 | |||
784 | sf_state = memset(sf_state_bo->virtual, 0, sizeof(*sf_state)); |
||
785 | sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); |
||
786 | sf_state->thread0.kernel_start_pointer = |
||
787 | intel_emit_reloc(sf_state_bo, |
||
788 | offsetof(struct brw_sf_unit_state, thread0), |
||
789 | kernel_bo, sf_state->thread0.grf_reg_count << 1, |
||
790 | I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; |
||
791 | sf_state->sf1.single_program_flow = 1; |
||
792 | sf_state->sf1.binding_table_entry_count = 0; |
||
793 | sf_state->sf1.thread_priority = 0; |
||
794 | sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ |
||
795 | sf_state->sf1.illegal_op_exception_enable = 1; |
||
796 | sf_state->sf1.mask_stack_exception_enable = 1; |
||
797 | sf_state->sf1.sw_exception_enable = 1; |
||
798 | sf_state->thread2.per_thread_scratch_space = 0; |
||
799 | /* scratch space is not used in our kernel */ |
||
800 | sf_state->thread2.scratch_space_base_pointer = 0; |
||
801 | sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ |
||
802 | sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ |
||
803 | sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ |
||
804 | /* don't smash vertex header, read start from dw8 */ |
||
805 | sf_state->thread3.urb_entry_read_offset = 1; |
||
806 | sf_state->thread3.dispatch_grf_start_reg = 3; |
||
807 | sf_state->thread4.max_threads = SF_MAX_THREADS - 1; |
||
808 | sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; |
||
809 | sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; |
||
810 | sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ |
||
811 | sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; |
||
812 | sf_state->sf6.scissor = 0; |
||
813 | sf_state->sf7.trifan_pv = 2; |
||
814 | sf_state->sf6.dest_org_vbias = 0x8; |
||
815 | sf_state->sf6.dest_org_hbias = 0x8; |
||
816 | |||
817 | drm_intel_bo_unmap(sf_state_bo); |
||
818 | |||
819 | return sf_state_bo; |
||
820 | (void)ret; |
||
821 | } |
||
822 | |||
823 | static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel) |
||
824 | { |
||
825 | struct brw_sampler_legacy_border_color sampler_border_color; |
||
826 | |||
827 | /* Set up the sampler border color (always transparent black) */ |
||
828 | memset(&sampler_border_color, 0, sizeof(sampler_border_color)); |
||
829 | sampler_border_color.color[0] = 0; /* R */ |
||
830 | sampler_border_color.color[1] = 0; /* G */ |
||
831 | sampler_border_color.color[2] = 0; /* B */ |
||
832 | sampler_border_color.color[3] = 0; /* A */ |
||
833 | |||
834 | return intel_bo_alloc_for_data(intel, |
||
835 | &sampler_border_color, |
||
836 | sizeof(sampler_border_color), |
||
837 | "gen4 render sampler border color"); |
||
838 | } |
||
839 | |||
840 | static void |
||
841 | gen4_sampler_state_init(drm_intel_bo * sampler_state_bo, |
||
842 | struct brw_sampler_state *sampler_state, |
||
843 | sampler_state_filter_t filter, |
||
844 | sampler_state_extend_t extend, |
||
845 | drm_intel_bo * border_color_bo) |
||
846 | { |
||
847 | uint32_t sampler_state_offset; |
||
848 | |||
849 | sampler_state_offset = (char *)sampler_state - |
||
850 | (char *)sampler_state_bo->virtual; |
||
851 | |||
852 | /* PS kernel use this sampler */ |
||
853 | memset(sampler_state, 0, sizeof(*sampler_state)); |
||
854 | |||
855 | sampler_state->ss0.lod_preclamp = 1; /* GL mode */ |
||
856 | |||
857 | /* We use the legacy mode to get the semantics specified by |
||
858 | * the Render extension. */ |
||
859 | sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; |
||
860 | |||
861 | switch (filter) { |
||
862 | default: |
||
863 | case SS_FILTER_NEAREST: |
||
864 | sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; |
||
865 | sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; |
||
866 | break; |
||
867 | case SS_FILTER_BILINEAR: |
||
868 | sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; |
||
869 | sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; |
||
870 | break; |
||
871 | } |
||
872 | |||
873 | switch (extend) { |
||
874 | default: |
||
875 | case SS_EXTEND_NONE: |
||
876 | sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; |
||
877 | sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; |
||
878 | sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; |
||
879 | break; |
||
880 | case SS_EXTEND_REPEAT: |
||
881 | sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; |
||
882 | sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; |
||
883 | sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; |
||
884 | break; |
||
885 | case SS_EXTEND_PAD: |
||
886 | sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; |
||
887 | sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; |
||
888 | sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; |
||
889 | break; |
||
890 | case SS_EXTEND_REFLECT: |
||
891 | sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; |
||
892 | sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; |
||
893 | sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; |
||
894 | break; |
||
895 | } |
||
896 | |||
897 | sampler_state->ss2.border_color_pointer = |
||
898 | intel_emit_reloc(sampler_state_bo, sampler_state_offset + |
||
899 | offsetof(struct brw_sampler_state, ss2), |
||
900 | border_color_bo, 0, |
||
901 | I915_GEM_DOMAIN_SAMPLER, 0) >> 5; |
||
902 | |||
903 | sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ |
||
904 | } |
||
905 | |||
906 | static void |
||
907 | gen7_sampler_state_init(drm_intel_bo * sampler_state_bo, |
||
908 | struct gen7_sampler_state *sampler_state, |
||
909 | sampler_state_filter_t filter, |
||
910 | sampler_state_extend_t extend, |
||
911 | drm_intel_bo * border_color_bo) |
||
912 | { |
||
913 | uint32_t sampler_state_offset; |
||
914 | |||
915 | sampler_state_offset = (char *)sampler_state - |
||
916 | (char *)sampler_state_bo->virtual; |
||
917 | |||
918 | /* PS kernel use this sampler */ |
||
919 | memset(sampler_state, 0, sizeof(*sampler_state)); |
||
920 | |||
921 | sampler_state->ss0.lod_preclamp = 1; /* GL mode */ |
||
922 | |||
923 | /* We use the legacy mode to get the semantics specified by |
||
924 | * the Render extension. */ |
||
925 | sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; |
||
926 | |||
927 | switch (filter) { |
||
928 | default: |
||
929 | case SS_FILTER_NEAREST: |
||
930 | sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; |
||
931 | sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; |
||
932 | break; |
||
933 | case SS_FILTER_BILINEAR: |
||
934 | sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; |
||
935 | sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; |
||
936 | break; |
||
937 | } |
||
938 | |||
939 | switch (extend) { |
||
940 | default: |
||
941 | case SS_EXTEND_NONE: |
||
942 | sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; |
||
943 | sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; |
||
944 | sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; |
||
945 | break; |
||
946 | case SS_EXTEND_REPEAT: |
||
947 | sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; |
||
948 | sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; |
||
949 | sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; |
||
950 | break; |
||
951 | case SS_EXTEND_PAD: |
||
952 | sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; |
||
953 | sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; |
||
954 | sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; |
||
955 | break; |
||
956 | case SS_EXTEND_REFLECT: |
||
957 | sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; |
||
958 | sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; |
||
959 | sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; |
||
960 | break; |
||
961 | } |
||
962 | |||
963 | sampler_state->ss2.default_color_pointer = |
||
964 | intel_emit_reloc(sampler_state_bo, sampler_state_offset + |
||
965 | offsetof(struct gen7_sampler_state, ss2), |
||
966 | border_color_bo, 0, |
||
967 | I915_GEM_DOMAIN_SAMPLER, 0) >> 5; |
||
968 | |||
969 | sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ |
||
970 | } |
||
971 | |||
972 | |||
973 | |||
974 | static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel, |
||
975 | sampler_state_filter_t src_filter, |
||
976 | sampler_state_extend_t src_extend, |
||
977 | sampler_state_filter_t mask_filter, |
||
978 | sampler_state_extend_t mask_extend, |
||
979 | drm_intel_bo * border_color_bo) |
||
980 | { |
||
981 | drm_intel_bo *sampler_state_bo; |
||
982 | struct brw_sampler_state *sampler_state; |
||
983 | int ret; |
||
984 | |||
985 | sampler_state_bo = |
||
986 | drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state", |
||
987 | sizeof(struct brw_sampler_state) * 2, 4096); |
||
988 | assert(sampler_state_bo); |
||
989 | |||
990 | ret = drm_intel_bo_map(sampler_state_bo, TRUE); |
||
991 | assert(ret == 0); |
||
992 | |||
993 | sampler_state = sampler_state_bo->virtual; |
||
994 | |||
995 | gen4_sampler_state_init(sampler_state_bo, |
||
996 | &sampler_state[0], |
||
997 | src_filter, src_extend, border_color_bo); |
||
998 | gen4_sampler_state_init(sampler_state_bo, |
||
999 | &sampler_state[1], |
||
1000 | mask_filter, mask_extend, border_color_bo); |
||
1001 | |||
1002 | drm_intel_bo_unmap(sampler_state_bo); |
||
1003 | |||
1004 | return sampler_state_bo; |
||
1005 | (void)ret; |
||
1006 | } |
||
1007 | |||
1008 | static drm_intel_bo * |
||
1009 | gen7_create_sampler_state(intel_screen_private *intel, |
||
1010 | sampler_state_filter_t src_filter, |
||
1011 | sampler_state_extend_t src_extend, |
||
1012 | sampler_state_filter_t mask_filter, |
||
1013 | sampler_state_extend_t mask_extend, |
||
1014 | drm_intel_bo * border_color_bo) |
||
1015 | { |
||
1016 | drm_intel_bo *sampler_state_bo; |
||
1017 | struct gen7_sampler_state *sampler_state; |
||
1018 | int ret; |
||
1019 | |||
1020 | sampler_state_bo = |
||
1021 | drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state", |
||
1022 | sizeof(struct gen7_sampler_state) * 2, 4096); |
||
1023 | assert(sampler_state_bo); |
||
1024 | |||
1025 | ret = drm_intel_bo_map(sampler_state_bo, TRUE); |
||
1026 | assert(ret == 0); |
||
1027 | |||
1028 | sampler_state = sampler_state_bo->virtual; |
||
1029 | |||
1030 | gen7_sampler_state_init(sampler_state_bo, |
||
1031 | &sampler_state[0], |
||
1032 | src_filter, src_extend, border_color_bo); |
||
1033 | gen7_sampler_state_init(sampler_state_bo, |
||
1034 | &sampler_state[1], |
||
1035 | mask_filter, mask_extend, border_color_bo); |
||
1036 | |||
1037 | drm_intel_bo_unmap(sampler_state_bo); |
||
1038 | |||
1039 | return sampler_state_bo; |
||
1040 | (void)ret; |
||
1041 | } |
||
1042 | |||
1043 | static inline drm_intel_bo * |
||
1044 | i965_create_sampler_state(intel_screen_private *intel, |
||
1045 | sampler_state_filter_t src_filter, |
||
1046 | sampler_state_extend_t src_extend, |
||
1047 | sampler_state_filter_t mask_filter, |
||
1048 | sampler_state_extend_t mask_extend, |
||
1049 | drm_intel_bo * border_color_bo) |
||
1050 | { |
||
1051 | if (INTEL_INFO(intel)->gen < 070) |
||
1052 | return gen4_create_sampler_state(intel, src_filter, src_extend, |
||
1053 | mask_filter, mask_extend, |
||
1054 | border_color_bo); |
||
1055 | return gen7_create_sampler_state(intel, src_filter, src_extend, |
||
1056 | mask_filter, mask_extend, |
||
1057 | border_color_bo); |
||
1058 | } |
||
1059 | |||
1060 | |||
1061 | static void |
||
1062 | cc_state_init(drm_intel_bo * cc_state_bo, |
||
1063 | uint32_t cc_state_offset, |
||
1064 | int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo) |
||
1065 | { |
||
1066 | struct brw_cc_unit_state *cc_state; |
||
1067 | |||
1068 | cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + |
||
1069 | cc_state_offset); |
||
1070 | |||
1071 | memset(cc_state, 0, sizeof(*cc_state)); |
||
1072 | cc_state->cc0.stencil_enable = 0; /* disable stencil */ |
||
1073 | cc_state->cc2.depth_test = 0; /* disable depth test */ |
||
1074 | cc_state->cc2.logicop_enable = 0; /* disable logic op */ |
||
1075 | cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ |
||
1076 | cc_state->cc3.blend_enable = 1; /* enable color blend */ |
||
1077 | cc_state->cc3.alpha_test = 0; /* disable alpha test */ |
||
1078 | |||
1079 | cc_state->cc4.cc_viewport_state_offset = |
||
1080 | intel_emit_reloc(cc_state_bo, cc_state_offset + |
||
1081 | offsetof(struct brw_cc_unit_state, cc4), |
||
1082 | cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; |
||
1083 | |||
1084 | cc_state->cc5.dither_enable = 0; /* disable dither */ |
||
1085 | cc_state->cc5.logicop_func = 0xc; /* COPY */ |
||
1086 | cc_state->cc5.statistics_enable = 1; |
||
1087 | cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; |
||
1088 | |||
1089 | /* Fill in alpha blend factors same as color, for the future. */ |
||
1090 | cc_state->cc5.ia_src_blend_factor = src_blend; |
||
1091 | cc_state->cc5.ia_dest_blend_factor = dst_blend; |
||
1092 | |||
1093 | cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; |
||
1094 | cc_state->cc6.clamp_post_alpha_blend = 1; |
||
1095 | cc_state->cc6.clamp_pre_alpha_blend = 1; |
||
1096 | cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ |
||
1097 | |||
1098 | cc_state->cc6.src_blend_factor = src_blend; |
||
1099 | cc_state->cc6.dest_blend_factor = dst_blend; |
||
1100 | } |
||
1101 | |||
1102 | static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel, |
||
1103 | Bool has_mask, |
||
1104 | drm_intel_bo * kernel_bo, |
||
1105 | drm_intel_bo * sampler_bo) |
||
1106 | { |
||
1107 | struct brw_wm_unit_state *state; |
||
1108 | drm_intel_bo *wm_state_bo; |
||
1109 | int ret; |
||
1110 | |||
1111 | wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state", |
||
1112 | sizeof(*state), 4096); |
||
1113 | assert(wm_state_bo); |
||
1114 | |||
1115 | ret = drm_intel_bo_map(wm_state_bo, TRUE); |
||
1116 | assert(ret == 0); |
||
1117 | |||
1118 | state = memset(wm_state_bo->virtual, 0, sizeof(*state)); |
||
1119 | state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); |
||
1120 | state->thread0.kernel_start_pointer = |
||
1121 | intel_emit_reloc(wm_state_bo, |
||
1122 | offsetof(struct brw_wm_unit_state, thread0), |
||
1123 | kernel_bo, state->thread0.grf_reg_count << 1, |
||
1124 | I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; |
||
1125 | |||
1126 | state->thread1.single_program_flow = 0; |
||
1127 | |||
1128 | /* scratch space is not used in our kernel */ |
||
1129 | state->thread2.scratch_space_base_pointer = 0; |
||
1130 | state->thread2.per_thread_scratch_space = 0; |
||
1131 | |||
1132 | state->thread3.const_urb_entry_read_length = 0; |
||
1133 | state->thread3.const_urb_entry_read_offset = 0; |
||
1134 | |||
1135 | state->thread3.urb_entry_read_offset = 0; |
||
1136 | /* wm kernel use urb from 3, see wm_program in compiler module */ |
||
1137 | state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ |
||
1138 | |||
1139 | if (IS_GEN5(intel)) |
||
1140 | state->wm4.sampler_count = 0; /* hardware requirement */ |
||
1141 | else |
||
1142 | state->wm4.sampler_count = 1; /* 1-4 samplers used */ |
||
1143 | |||
1144 | state->wm4.sampler_state_pointer = |
||
1145 | intel_emit_reloc(wm_state_bo, |
||
1146 | offsetof(struct brw_wm_unit_state, wm4), |
||
1147 | sampler_bo, |
||
1148 | state->wm4.sampler_count << 2, |
||
1149 | I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; |
||
1150 | state->wm5.max_threads = PS_MAX_THREADS - 1; |
||
1151 | state->wm5.transposed_urb_read = 0; |
||
1152 | state->wm5.thread_dispatch_enable = 1; |
||
1153 | /* just use 16-pixel dispatch (4 subspans), don't need to change kernel |
||
1154 | * start point |
||
1155 | */ |
||
1156 | state->wm5.enable_16_pix = 1; |
||
1157 | state->wm5.enable_8_pix = 0; |
||
1158 | state->wm5.early_depth_test = 1; |
||
1159 | |||
1160 | /* Each pair of attributes (src/mask coords) is two URB entries */ |
||
1161 | if (has_mask) { |
||
1162 | state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ |
||
1163 | state->thread3.urb_entry_read_length = 4; |
||
1164 | } else { |
||
1165 | state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ |
||
1166 | state->thread3.urb_entry_read_length = 2; |
||
1167 | } |
||
1168 | |||
1169 | /* binding table entry count is only used for prefetching, and it has to |
||
1170 | * be set 0 for Ironlake |
||
1171 | */ |
||
1172 | if (IS_GEN5(intel)) |
||
1173 | state->thread1.binding_table_entry_count = 0; |
||
1174 | |||
1175 | drm_intel_bo_unmap(wm_state_bo); |
||
1176 | |||
1177 | return wm_state_bo; |
||
1178 | (void)ret; |
||
1179 | } |
||
1180 | |||
1181 | static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel) |
||
1182 | { |
||
1183 | drm_intel_bo *bo; |
||
1184 | struct brw_cc_viewport vp; |
||
1185 | int ret; |
||
1186 | |||
1187 | vp.min_depth = -1.e35; |
||
1188 | vp.max_depth = 1.e35; |
||
1189 | |||
1190 | bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state", |
||
1191 | sizeof(vp), 4096); |
||
1192 | assert(bo); |
||
1193 | |||
1194 | ret = drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp); |
||
1195 | assert(ret == 0); |
||
1196 | |||
1197 | return bo; |
||
1198 | (void)ret; |
||
1199 | } |
||
1200 | |||
1201 | static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel) |
||
1202 | { |
||
1203 | struct brw_vs_unit_state vs_state; |
||
1204 | memset(&vs_state, 0, sizeof(vs_state)); |
||
1205 | |||
1206 | /* Set up the vertex shader to be disabled (passthrough) */ |
||
1207 | if (IS_GEN5(intel)) |
||
1208 | vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ |
||
1209 | else |
||
1210 | vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; |
||
1211 | vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; |
||
1212 | vs_state.vs6.vs_enable = 0; |
||
1213 | vs_state.vs6.vert_cache_disable = 1; |
||
1214 | |||
1215 | return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state), |
||
1216 | "gen4 render VS state"); |
||
1217 | } |
||
1218 | |||
1219 | /** |
||
1220 | * Set up all combinations of cc state: each blendfactor for source and |
||
1221 | * dest. |
||
1222 | */ |
||
1223 | static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel) |
||
1224 | { |
||
1225 | drm_intel_bo *cc_state_bo, *cc_vp_bo; |
||
1226 | int i, j, ret; |
||
1227 | |||
1228 | cc_vp_bo = gen4_create_cc_viewport(intel); |
||
1229 | |||
1230 | cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state", |
||
1231 | sizeof(struct gen4_cc_unit_state), |
||
1232 | 4096); |
||
1233 | assert(cc_state_bo); |
||
1234 | |||
1235 | ret = drm_intel_bo_map(cc_state_bo, TRUE); |
||
1236 | assert(ret == 0); |
||
1237 | |||
1238 | for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { |
||
1239 | for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { |
||
1240 | cc_state_init(cc_state_bo, |
||
1241 | offsetof(struct gen4_cc_unit_state, |
||
1242 | cc_state[i][j].state), |
||
1243 | i, j, cc_vp_bo); |
||
1244 | } |
||
1245 | } |
||
1246 | drm_intel_bo_unmap(cc_state_bo); |
||
1247 | |||
1248 | drm_intel_bo_unreference(cc_vp_bo); |
||
1249 | |||
1250 | return cc_state_bo; |
||
1251 | (void)ret; |
||
1252 | } |
||
1253 | |||
1254 | static uint32_t i965_get_card_format(PicturePtr picture) |
||
1255 | { |
||
1256 | int i; |
||
1257 | |||
1258 | for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); |
||
1259 | i++) { |
||
1260 | if (i965_tex_formats[i].fmt == picture->format) |
||
1261 | break; |
||
1262 | } |
||
1263 | assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); |
||
1264 | |||
1265 | return i965_tex_formats[i].card_fmt; |
||
1266 | } |
||
1267 | |||
1268 | static sampler_state_filter_t sampler_state_filter_from_picture(int filter) |
||
1269 | { |
||
1270 | switch (filter) { |
||
1271 | case PictFilterNearest: |
||
1272 | return SS_FILTER_NEAREST; |
||
1273 | case PictFilterBilinear: |
||
1274 | return SS_FILTER_BILINEAR; |
||
1275 | default: |
||
1276 | return SS_INVALID_FILTER; |
||
1277 | } |
||
1278 | } |
||
1279 | |||
1280 | static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type) |
||
1281 | { |
||
1282 | switch (repeat_type) { |
||
1283 | case RepeatNone: |
||
1284 | return SS_EXTEND_NONE; |
||
1285 | case RepeatNormal: |
||
1286 | return SS_EXTEND_REPEAT; |
||
1287 | case RepeatPad: |
||
1288 | return SS_EXTEND_PAD; |
||
1289 | case RepeatReflect: |
||
1290 | return SS_EXTEND_REFLECT; |
||
1291 | default: |
||
1292 | return SS_INVALID_EXTEND; |
||
1293 | } |
||
1294 | } |
||
1295 | |||
1296 | /** |
||
1297 | * Sets up the common fields for a surface state buffer for the given |
||
1298 | * picture in the given surface state buffer. |
||
1299 | */ |
||
1300 | static int |
||
1301 | gen4_set_picture_surface_state(intel_screen_private *intel, |
||
1302 | PicturePtr picture, PixmapPtr pixmap, |
||
1303 | Bool is_dst) |
||
1304 | { |
||
1305 | struct intel_pixmap *priv = pixmap->private; |
||
1306 | struct brw_surface_state *ss; |
||
1307 | uint32_t write_domain, read_domains; |
||
1308 | int offset; |
||
1309 | |||
1310 | if (is_dst) { |
||
1311 | write_domain = I915_GEM_DOMAIN_RENDER; |
||
1312 | read_domains = I915_GEM_DOMAIN_RENDER; |
||
1313 | } else { |
||
1314 | write_domain = 0; |
||
1315 | read_domains = I915_GEM_DOMAIN_SAMPLER; |
||
1316 | } |
||
1317 | intel_batch_mark_pixmap_domains(intel, priv, |
||
1318 | read_domains, write_domain); |
||
1319 | ss = (struct brw_surface_state *) |
||
1320 | (intel->surface_data + intel->surface_used); |
||
1321 | |||
1322 | memset(ss, 0, sizeof(*ss)); |
||
1323 | ss->ss0.surface_type = BRW_SURFACE_2D; |
||
1324 | if (is_dst) |
||
1325 | ss->ss0.surface_format = i965_get_dest_format(picture); |
||
1326 | else |
||
1327 | ss->ss0.surface_format = i965_get_card_format(picture); |
||
1328 | |||
1329 | ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; |
||
1330 | ss->ss0.color_blend = 1; |
||
1331 | ss->ss1.base_addr = priv->bo->offset; |
||
1332 | |||
1333 | ss->ss2.height = pixmap->drawable.height - 1; |
||
1334 | ss->ss2.width = pixmap->drawable.width - 1; |
||
1335 | ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; |
||
1336 | ss->ss3.tile_walk = 0; /* Tiled X */ |
||
1337 | ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; |
||
1338 | |||
1339 | dri_bo_emit_reloc(intel->surface_bo, |
||
1340 | read_domains, write_domain, |
||
1341 | 0, |
||
1342 | intel->surface_used + |
||
1343 | offsetof(struct brw_surface_state, ss1), |
||
1344 | priv->bo); |
||
1345 | |||
1346 | offset = intel->surface_used; |
||
1347 | intel->surface_used += SURFACE_STATE_PADDED_SIZE; |
||
1348 | |||
1349 | return offset; |
||
1350 | } |
||
1351 | |||
1352 | static int |
||
1353 | gen7_set_picture_surface_state(intel_screen_private *intel, |
||
1354 | PicturePtr picture, PixmapPtr pixmap, |
||
1355 | Bool is_dst) |
||
1356 | { |
||
1357 | struct intel_pixmap *priv = pixmap->private; |
||
1358 | struct gen7_surface_state *ss; |
||
1359 | uint32_t write_domain, read_domains; |
||
1360 | int offset; |
||
1361 | |||
1362 | if (is_dst) { |
||
1363 | write_domain = I915_GEM_DOMAIN_RENDER; |
||
1364 | read_domains = I915_GEM_DOMAIN_RENDER; |
||
1365 | } else { |
||
1366 | write_domain = 0; |
||
1367 | read_domains = I915_GEM_DOMAIN_SAMPLER; |
||
1368 | } |
||
1369 | intel_batch_mark_pixmap_domains(intel, priv, |
||
1370 | read_domains, write_domain); |
||
1371 | ss = (struct gen7_surface_state *) |
||
1372 | (intel->surface_data + intel->surface_used); |
||
1373 | |||
1374 | memset(ss, 0, sizeof(*ss)); |
||
1375 | ss->ss0.surface_type = BRW_SURFACE_2D; |
||
1376 | if (is_dst) |
||
1377 | ss->ss0.surface_format = i965_get_dest_format(picture); |
||
1378 | else |
||
1379 | ss->ss0.surface_format = i965_get_card_format(picture); |
||
1380 | |||
1381 | ss->ss0.tile_walk = 0; /* Tiled X */ |
||
1382 | ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; |
||
1383 | ss->ss1.base_addr = priv->bo->offset; |
||
1384 | |||
1385 | ss->ss2.height = pixmap->drawable.height - 1; |
||
1386 | ss->ss2.width = pixmap->drawable.width - 1; |
||
1387 | ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; |
||
1388 | |||
1389 | if (IS_HSW(intel)) { |
||
1390 | ss->ss7.shader_chanel_select_r = HSW_SCS_RED; |
||
1391 | ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; |
||
1392 | ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; |
||
1393 | ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; |
||
1394 | } |
||
1395 | |||
1396 | dri_bo_emit_reloc(intel->surface_bo, |
||
1397 | read_domains, write_domain, |
||
1398 | 0, |
||
1399 | intel->surface_used + |
||
1400 | offsetof(struct gen7_surface_state, ss1), |
||
1401 | priv->bo); |
||
1402 | |||
1403 | offset = intel->surface_used; |
||
1404 | intel->surface_used += SURFACE_STATE_PADDED_SIZE; |
||
1405 | |||
1406 | return offset; |
||
1407 | } |
||
1408 | |||
1409 | static inline int |
||
1410 | i965_set_picture_surface_state(intel_screen_private *intel, |
||
1411 | PicturePtr picture, PixmapPtr pixmap, |
||
1412 | Bool is_dst) |
||
1413 | { |
||
1414 | if (INTEL_INFO(intel)->gen < 070) |
||
1415 | return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst); |
||
1416 | return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst); |
||
1417 | } |
||
1418 | |||
1419 | static void gen4_composite_vertex_elements(struct intel_screen_private *intel) |
||
1420 | { |
||
1421 | struct gen4_render_state *render_state = intel->gen4_render_state; |
||
1422 | gen4_composite_op *composite_op = &render_state->composite_op; |
||
1423 | Bool has_mask = intel->render_mask != NULL; |
||
1424 | Bool is_affine = composite_op->is_affine; |
||
1425 | /* |
||
1426 | * number of extra parameters per vertex |
||
1427 | */ |
||
1428 | int nelem = has_mask ? 2 : 1; |
||
1429 | /* |
||
1430 | * size of extra parameters: |
||
1431 | * 3 for homogenous (xyzw) |
||
1432 | * 2 for cartesian (xy) |
||
1433 | */ |
||
1434 | int selem = is_affine ? 2 : 3; |
||
1435 | uint32_t w_component; |
||
1436 | uint32_t src_format; |
||
1437 | int id; |
||
1438 | |||
1439 | id = has_mask << 1 | is_affine; |
||
1440 | |||
1441 | if (composite_op->vertex_id == id) |
||
1442 | return; |
||
1443 | |||
1444 | composite_op->vertex_id = id; |
||
1445 | |||
1446 | if (is_affine) { |
||
1447 | src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; |
||
1448 | w_component = BRW_VFCOMPONENT_STORE_1_FLT; |
||
1449 | } else { |
||
1450 | src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; |
||
1451 | w_component = BRW_VFCOMPONENT_STORE_SRC; |
||
1452 | } |
||
1453 | |||
1454 | if (IS_GEN5(intel)) { |
||
1455 | /* |
||
1456 | * The reason to add this extra vertex element in the header is that |
||
1457 | * Ironlake has different vertex header definition and origin method to |
||
1458 | * set destination element offset doesn't exist anymore, which means |
||
1459 | * hardware requires a predefined vertex element layout. |
||
1460 | * |
||
1461 | * haihao proposed this approach to fill the first vertex element, so |
||
1462 | * origin layout for Gen4 doesn't need to change, and origin shader |
||
1463 | * programs behavior is also kept. |
||
1464 | * |
||
1465 | * I think this is not bad. - zhenyu |
||
1466 | */ |
||
1467 | |||
1468 | OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | |
||
1469 | ((2 * (2 + nelem)) - 1)); |
||
1470 | OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | |
||
1471 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | |
||
1472 | (0 << VE0_OFFSET_SHIFT)); |
||
1473 | |||
1474 | OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | |
||
1475 | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | |
||
1476 | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | |
||
1477 | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); |
||
1478 | } else { |
||
1479 | /* Set up our vertex elements, sourced from the single vertex buffer. |
||
1480 | * that will be set up later. |
||
1481 | */ |
||
1482 | OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | |
||
1483 | ((2 * (1 + nelem)) - 1)); |
||
1484 | } |
||
1485 | |||
1486 | /* x,y */ |
||
1487 | OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | |
||
1488 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | |
||
1489 | (0 << VE0_OFFSET_SHIFT)); |
||
1490 | |||
1491 | if (IS_GEN5(intel)) |
||
1492 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
1493 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
1494 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | |
||
1495 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); |
||
1496 | else |
||
1497 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
1498 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
1499 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | |
||
1500 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | |
||
1501 | (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); |
||
1502 | /* u0, v0, w0 */ |
||
1503 | OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | |
||
1504 | (src_format << VE0_FORMAT_SHIFT) | |
||
1505 | ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ |
||
1506 | |||
1507 | if (IS_GEN5(intel)) |
||
1508 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
1509 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
1510 | (w_component << VE1_VFCOMPONENT_2_SHIFT) | |
||
1511 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); |
||
1512 | else |
||
1513 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
1514 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
1515 | (w_component << VE1_VFCOMPONENT_2_SHIFT) | |
||
1516 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | |
||
1517 | ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ |
||
1518 | /* u1, v1, w1 */ |
||
1519 | if (has_mask) { |
||
1520 | OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | |
||
1521 | (src_format << VE0_FORMAT_SHIFT) | |
||
1522 | (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ |
||
1523 | |||
1524 | if (IS_GEN5(intel)) |
||
1525 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
1526 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
1527 | (w_component << VE1_VFCOMPONENT_2_SHIFT) | |
||
1528 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); |
||
1529 | else |
||
1530 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
1531 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
1532 | (w_component << VE1_VFCOMPONENT_2_SHIFT) | |
||
1533 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | |
||
1534 | ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ |
||
1535 | } |
||
1536 | } |
||
1537 | |||
1538 | static void i965_emit_composite_state(struct intel_screen_private *intel) |
||
1539 | { |
||
1540 | struct gen4_render_state *render_state = intel->gen4_render_state; |
||
1541 | gen4_composite_op *composite_op = &render_state->composite_op; |
||
1542 | int op = composite_op->op; |
||
1543 | PicturePtr mask_picture = intel->render_mask_picture; |
||
1544 | PicturePtr dest_picture = intel->render_dest_picture; |
||
1545 | PixmapPtr mask = intel->render_mask; |
||
1546 | PixmapPtr dest = intel->render_dest; |
||
1547 | sampler_state_filter_t src_filter = composite_op->src_filter; |
||
1548 | sampler_state_filter_t mask_filter = composite_op->mask_filter; |
||
1549 | sampler_state_extend_t src_extend = composite_op->src_extend; |
||
1550 | sampler_state_extend_t mask_extend = composite_op->mask_extend; |
||
1551 | uint32_t src_blend, dst_blend; |
||
1552 | |||
1553 | intel->needs_render_state_emit = FALSE; |
||
1554 | |||
1555 | /* Begin the long sequence of commands needed to set up the 3D |
||
1556 | * rendering pipe |
||
1557 | */ |
||
1558 | |||
1559 | if (intel->needs_3d_invariant) { |
||
1560 | if (IS_GEN5(intel)) { |
||
1561 | /* Ironlake errata workaround: Before disabling the clipper, |
||
1562 | * you have to MI_FLUSH to get the pipeline idle. |
||
1563 | */ |
||
1564 | OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); |
||
1565 | } |
||
1566 | |||
1567 | /* Match Mesa driver setup */ |
||
1568 | if (INTEL_INFO(intel)->gen >= 045) |
||
1569 | OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); |
||
1570 | else |
||
1571 | OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); |
||
1572 | |||
1573 | /* Set system instruction pointer */ |
||
1574 | OUT_BATCH(BRW_STATE_SIP | 0); |
||
1575 | OUT_BATCH(0); |
||
1576 | |||
1577 | intel->needs_3d_invariant = FALSE; |
||
1578 | } |
||
1579 | |||
1580 | if (intel->surface_reloc == 0) { |
||
1581 | /* Zero out the two base address registers so all offsets are |
||
1582 | * absolute. |
||
1583 | */ |
||
1584 | if (IS_GEN5(intel)) { |
||
1585 | OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); |
||
1586 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ |
||
1587 | intel->surface_reloc = intel->batch_used; |
||
1588 | intel_batch_emit_dword(intel, |
||
1589 | intel->surface_bo->offset | BASE_ADDRESS_MODIFY); |
||
1590 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ |
||
1591 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ |
||
1592 | /* general state max addr, disabled */ |
||
1593 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); |
||
1594 | /* media object state max addr, disabled */ |
||
1595 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); |
||
1596 | /* Instruction max addr, disabled */ |
||
1597 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); |
||
1598 | } else { |
||
1599 | OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); |
||
1600 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ |
||
1601 | intel->surface_reloc = intel->batch_used; |
||
1602 | intel_batch_emit_dword(intel, |
||
1603 | intel->surface_bo->offset | BASE_ADDRESS_MODIFY); |
||
1604 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ |
||
1605 | /* general state max addr, disabled */ |
||
1606 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); |
||
1607 | /* media object state max addr, disabled */ |
||
1608 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); |
||
1609 | } |
||
1610 | } |
||
1611 | |||
1612 | i965_get_blend_cntl(op, mask_picture, dest_picture->format, |
||
1613 | &src_blend, &dst_blend); |
||
1614 | |||
1615 | /* Binding table pointers */ |
||
1616 | OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); |
||
1617 | OUT_BATCH(0); /* vs */ |
||
1618 | OUT_BATCH(0); /* gs */ |
||
1619 | OUT_BATCH(0); /* clip */ |
||
1620 | OUT_BATCH(0); /* sf */ |
||
1621 | /* Only the PS uses the binding table */ |
||
1622 | OUT_BATCH(intel->surface_table); |
||
1623 | |||
1624 | /* The drawing rectangle clipping is always on. Set it to values that |
||
1625 | * shouldn't do any clipping. |
||
1626 | */ |
||
1627 | OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); |
||
1628 | OUT_BATCH(0x00000000); /* ymin, xmin */ |
||
1629 | OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | |
||
1630 | DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ |
||
1631 | OUT_BATCH(0x00000000); /* yorigin, xorigin */ |
||
1632 | |||
1633 | /* skip the depth buffer */ |
||
1634 | /* skip the polygon stipple */ |
||
1635 | /* skip the polygon stipple offset */ |
||
1636 | /* skip the line stipple */ |
||
1637 | |||
1638 | /* Set the pointers to the 3d pipeline state */ |
||
1639 | OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); |
||
1640 | OUT_RELOC(render_state->vs_state_bo, |
||
1641 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); |
||
1642 | OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ |
||
1643 | OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ |
||
1644 | if (mask) { |
||
1645 | OUT_RELOC(render_state->sf_mask_state_bo, |
||
1646 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); |
||
1647 | } else { |
||
1648 | OUT_RELOC(render_state->sf_state_bo, |
||
1649 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); |
||
1650 | } |
||
1651 | |||
1652 | OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] |
||
1653 | [src_filter][src_extend] |
||
1654 | [mask_filter][mask_extend], |
||
1655 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); |
||
1656 | |||
1657 | OUT_RELOC(render_state->cc_state_bo, |
||
1658 | I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
1659 | offsetof(struct gen4_cc_unit_state, |
||
1660 | cc_state[src_blend][dst_blend])); |
||
1661 | |||
1662 | { |
||
1663 | int urb_vs_start, urb_vs_size; |
||
1664 | int urb_gs_start, urb_gs_size; |
||
1665 | int urb_clip_start, urb_clip_size; |
||
1666 | int urb_sf_start, urb_sf_size; |
||
1667 | int urb_cs_start, urb_cs_size; |
||
1668 | |||
1669 | urb_vs_start = 0; |
||
1670 | urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; |
||
1671 | urb_gs_start = urb_vs_start + urb_vs_size; |
||
1672 | urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; |
||
1673 | urb_clip_start = urb_gs_start + urb_gs_size; |
||
1674 | urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; |
||
1675 | urb_sf_start = urb_clip_start + urb_clip_size; |
||
1676 | urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; |
||
1677 | urb_cs_start = urb_sf_start + urb_sf_size; |
||
1678 | urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; |
||
1679 | |||
1680 | /* Erratum (Vol 1a, p32): |
||
1681 | * URB_FENCE must not cross a cache-line (64 bytes). |
||
1682 | */ |
||
1683 | if ((intel->batch_used & 15) > (16 - 3)) { |
||
1684 | int cnt = 16 - (intel->batch_used & 15); |
||
1685 | while (cnt--) |
||
1686 | OUT_BATCH(MI_NOOP); |
||
1687 | } |
||
1688 | |||
1689 | OUT_BATCH(BRW_URB_FENCE | |
||
1690 | UF0_CS_REALLOC | |
||
1691 | UF0_SF_REALLOC | |
||
1692 | UF0_CLIP_REALLOC | |
||
1693 | UF0_GS_REALLOC | |
||
1694 | UF0_VS_REALLOC | |
||
1695 | 1); |
||
1696 | OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | |
||
1697 | ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | |
||
1698 | ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); |
||
1699 | OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | |
||
1700 | ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); |
||
1701 | |||
1702 | /* Constant buffer state */ |
||
1703 | OUT_BATCH(BRW_CS_URB_STATE | 0); |
||
1704 | OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | |
||
1705 | (URB_CS_ENTRIES << 0)); |
||
1706 | } |
||
1707 | |||
1708 | gen4_composite_vertex_elements(intel); |
||
1709 | } |
||
1710 | |||
1711 | /** |
||
1712 | * Returns whether the current set of composite state plus vertex buffer is |
||
1713 | * expected to fit in the aperture. |
||
1714 | */ |
||
1715 | static Bool i965_composite_check_aperture(intel_screen_private *intel) |
||
1716 | { |
||
1717 | struct gen4_render_state *render_state = intel->gen4_render_state; |
||
1718 | gen4_composite_op *composite_op = &render_state->composite_op; |
||
1719 | drm_intel_bo *bo_table[] = { |
||
1720 | intel->batch_bo, |
||
1721 | intel->vertex_bo, |
||
1722 | intel->surface_bo, |
||
1723 | render_state->vs_state_bo, |
||
1724 | render_state->sf_state_bo, |
||
1725 | render_state->sf_mask_state_bo, |
||
1726 | render_state->wm_state_bo[composite_op->wm_kernel] |
||
1727 | [composite_op->src_filter] |
||
1728 | [composite_op->src_extend] |
||
1729 | [composite_op->mask_filter] |
||
1730 | [composite_op->mask_extend], |
||
1731 | render_state->cc_state_bo, |
||
1732 | }; |
||
1733 | drm_intel_bo *gen6_bo_table[] = { |
||
1734 | intel->batch_bo, |
||
1735 | intel->vertex_bo, |
||
1736 | intel->surface_bo, |
||
1737 | render_state->wm_kernel_bo[composite_op->wm_kernel], |
||
1738 | render_state->ps_sampler_state_bo[composite_op->src_filter] |
||
1739 | [composite_op->src_extend] |
||
1740 | [composite_op->mask_filter] |
||
1741 | [composite_op->mask_extend], |
||
1742 | render_state->cc_vp_bo, |
||
1743 | render_state->cc_state_bo, |
||
1744 | render_state->gen6_blend_bo, |
||
1745 | render_state->gen6_depth_stencil_bo, |
||
1746 | }; |
||
1747 | |||
1748 | if (INTEL_INFO(intel)->gen >= 060) |
||
1749 | return drm_intel_bufmgr_check_aperture_space(gen6_bo_table, |
||
1750 | ARRAY_SIZE(gen6_bo_table)) == 0; |
||
1751 | else |
||
1752 | return drm_intel_bufmgr_check_aperture_space(bo_table, |
||
1753 | ARRAY_SIZE(bo_table)) == 0; |
||
1754 | } |
||
1755 | |||
1756 | static void i965_surface_flush(struct intel_screen_private *intel) |
||
1757 | { |
||
1758 | int ret; |
||
1759 | |||
1760 | ret = drm_intel_bo_subdata(intel->surface_bo, |
||
1761 | 0, intel->surface_used, |
||
1762 | intel->surface_data); |
||
1763 | assert(ret == 0); |
||
1764 | intel->surface_used = 0; |
||
1765 | |||
1766 | assert (intel->surface_reloc != 0); |
||
1767 | drm_intel_bo_emit_reloc(intel->batch_bo, |
||
1768 | intel->surface_reloc * 4, |
||
1769 | intel->surface_bo, BASE_ADDRESS_MODIFY, |
||
1770 | I915_GEM_DOMAIN_INSTRUCTION, 0); |
||
1771 | intel->surface_reloc = 0; |
||
1772 | |||
1773 | drm_intel_bo_unreference(intel->surface_bo); |
||
1774 | intel->surface_bo = |
||
1775 | drm_intel_bo_alloc(intel->bufmgr, "surface data", |
||
1776 | sizeof(intel->surface_data), 4096); |
||
1777 | assert(intel->surface_bo); |
||
1778 | |||
1779 | return; |
||
1780 | (void)ret; |
||
1781 | } |
||
1782 | |||
1783 | static void |
||
1784 | i965_emit_composite_primitive_identity_source(intel_screen_private *intel, |
||
1785 | int srcX, int srcY, |
||
1786 | int maskX, int maskY, |
||
1787 | int dstX, int dstY, |
||
1788 | int w, int h) |
||
1789 | { |
||
1790 | OUT_VERTEX(dstX + w); |
||
1791 | OUT_VERTEX(dstY + h); |
||
1792 | OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); |
||
1793 | OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); |
||
1794 | |||
1795 | OUT_VERTEX(dstX); |
||
1796 | OUT_VERTEX(dstY + h); |
||
1797 | OUT_VERTEX(srcX * intel->scale_units[0][0]); |
||
1798 | OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); |
||
1799 | |||
1800 | OUT_VERTEX(dstX); |
||
1801 | OUT_VERTEX(dstY); |
||
1802 | OUT_VERTEX(srcX * intel->scale_units[0][0]); |
||
1803 | OUT_VERTEX(srcY * intel->scale_units[0][1]); |
||
1804 | } |
||
1805 | |||
1806 | static void |
||
1807 | i965_emit_composite_primitive_affine_source(intel_screen_private *intel, |
||
1808 | int srcX, int srcY, |
||
1809 | int maskX, int maskY, |
||
1810 | int dstX, int dstY, |
||
1811 | int w, int h) |
||
1812 | { |
||
1813 | float src_x[3], src_y[3]; |
||
1814 | |||
1815 | if (!intel_get_transformed_coordinates(srcX, srcY, |
||
1816 | intel->transform[0], |
||
1817 | &src_x[0], |
||
1818 | &src_y[0])) |
||
1819 | return; |
||
1820 | |||
1821 | if (!intel_get_transformed_coordinates(srcX, srcY + h, |
||
1822 | intel->transform[0], |
||
1823 | &src_x[1], |
||
1824 | &src_y[1])) |
||
1825 | return; |
||
1826 | |||
1827 | if (!intel_get_transformed_coordinates(srcX + w, srcY + h, |
||
1828 | intel->transform[0], |
||
1829 | &src_x[2], |
||
1830 | &src_y[2])) |
||
1831 | return; |
||
1832 | |||
1833 | OUT_VERTEX(dstX + w); |
||
1834 | OUT_VERTEX(dstY + h); |
||
1835 | OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); |
||
1836 | OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); |
||
1837 | |||
1838 | OUT_VERTEX(dstX); |
||
1839 | OUT_VERTEX(dstY + h); |
||
1840 | OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); |
||
1841 | OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); |
||
1842 | |||
1843 | OUT_VERTEX(dstX); |
||
1844 | OUT_VERTEX(dstY); |
||
1845 | OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); |
||
1846 | OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); |
||
1847 | } |
||
1848 | |||
1849 | static void |
||
1850 | i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel, |
||
1851 | int srcX, int srcY, |
||
1852 | int maskX, int maskY, |
||
1853 | int dstX, int dstY, |
||
1854 | int w, int h) |
||
1855 | { |
||
1856 | OUT_VERTEX(dstX + w); |
||
1857 | OUT_VERTEX(dstY + h); |
||
1858 | OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); |
||
1859 | OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); |
||
1860 | OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); |
||
1861 | OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); |
||
1862 | |||
1863 | OUT_VERTEX(dstX); |
||
1864 | OUT_VERTEX(dstY + h); |
||
1865 | OUT_VERTEX(srcX * intel->scale_units[0][0]); |
||
1866 | OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); |
||
1867 | OUT_VERTEX(maskX * intel->scale_units[1][0]); |
||
1868 | OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); |
||
1869 | |||
1870 | OUT_VERTEX(dstX); |
||
1871 | OUT_VERTEX(dstY); |
||
1872 | OUT_VERTEX(srcX * intel->scale_units[0][0]); |
||
1873 | OUT_VERTEX(srcY * intel->scale_units[0][1]); |
||
1874 | OUT_VERTEX(maskX * intel->scale_units[1][0]); |
||
1875 | OUT_VERTEX(maskY * intel->scale_units[1][1]); |
||
1876 | } |
||
1877 | |||
1878 | static void |
||
1879 | i965_emit_composite_primitive(intel_screen_private *intel, |
||
1880 | int srcX, int srcY, |
||
1881 | int maskX, int maskY, |
||
1882 | int dstX, int dstY, |
||
1883 | int w, int h) |
||
1884 | { |
||
1885 | float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; |
||
1886 | Bool is_affine = intel->gen4_render_state->composite_op.is_affine; |
||
1887 | |||
1888 | if (is_affine) { |
||
1889 | if (!intel_get_transformed_coordinates(srcX, srcY, |
||
1890 | intel->transform[0], |
||
1891 | &src_x[0], |
||
1892 | &src_y[0])) |
||
1893 | return; |
||
1894 | |||
1895 | if (!intel_get_transformed_coordinates(srcX, srcY + h, |
||
1896 | intel->transform[0], |
||
1897 | &src_x[1], |
||
1898 | &src_y[1])) |
||
1899 | return; |
||
1900 | |||
1901 | if (!intel_get_transformed_coordinates(srcX + w, srcY + h, |
||
1902 | intel->transform[0], |
||
1903 | &src_x[2], |
||
1904 | &src_y[2])) |
||
1905 | return; |
||
1906 | } else { |
||
1907 | if (!intel_get_transformed_coordinates_3d(srcX, srcY, |
||
1908 | intel->transform[0], |
||
1909 | &src_x[0], |
||
1910 | &src_y[0], |
||
1911 | &src_w[0])) |
||
1912 | return; |
||
1913 | |||
1914 | if (!intel_get_transformed_coordinates_3d(srcX, srcY + h, |
||
1915 | intel->transform[0], |
||
1916 | &src_x[1], |
||
1917 | &src_y[1], |
||
1918 | &src_w[1])) |
||
1919 | return; |
||
1920 | |||
1921 | if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h, |
||
1922 | intel->transform[0], |
||
1923 | &src_x[2], |
||
1924 | &src_y[2], |
||
1925 | &src_w[2])) |
||
1926 | return; |
||
1927 | } |
||
1928 | |||
1929 | if (intel->render_mask) { |
||
1930 | if (is_affine) { |
||
1931 | if (!intel_get_transformed_coordinates(maskX, maskY, |
||
1932 | intel->transform[1], |
||
1933 | &mask_x[0], |
||
1934 | &mask_y[0])) |
||
1935 | return; |
||
1936 | |||
1937 | if (!intel_get_transformed_coordinates(maskX, maskY + h, |
||
1938 | intel->transform[1], |
||
1939 | &mask_x[1], |
||
1940 | &mask_y[1])) |
||
1941 | return; |
||
1942 | |||
1943 | if (!intel_get_transformed_coordinates(maskX + w, maskY + h, |
||
1944 | intel->transform[1], |
||
1945 | &mask_x[2], |
||
1946 | &mask_y[2])) |
||
1947 | return; |
||
1948 | } else { |
||
1949 | if (!intel_get_transformed_coordinates_3d(maskX, maskY, |
||
1950 | intel->transform[1], |
||
1951 | &mask_x[0], |
||
1952 | &mask_y[0], |
||
1953 | &mask_w[0])) |
||
1954 | return; |
||
1955 | |||
1956 | if (!intel_get_transformed_coordinates_3d(maskX, maskY + h, |
||
1957 | intel->transform[1], |
||
1958 | &mask_x[1], |
||
1959 | &mask_y[1], |
||
1960 | &mask_w[1])) |
||
1961 | return; |
||
1962 | |||
1963 | if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h, |
||
1964 | intel->transform[1], |
||
1965 | &mask_x[2], |
||
1966 | &mask_y[2], |
||
1967 | &mask_w[2])) |
||
1968 | return; |
||
1969 | } |
||
1970 | } |
||
1971 | |||
1972 | OUT_VERTEX(dstX + w); |
||
1973 | OUT_VERTEX(dstY + h); |
||
1974 | OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); |
||
1975 | OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); |
||
1976 | if (!is_affine) |
||
1977 | OUT_VERTEX(src_w[2]); |
||
1978 | if (intel->render_mask) { |
||
1979 | OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]); |
||
1980 | OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]); |
||
1981 | if (!is_affine) |
||
1982 | OUT_VERTEX(mask_w[2]); |
||
1983 | } |
||
1984 | |||
1985 | OUT_VERTEX(dstX); |
||
1986 | OUT_VERTEX(dstY + h); |
||
1987 | OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); |
||
1988 | OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); |
||
1989 | if (!is_affine) |
||
1990 | OUT_VERTEX(src_w[1]); |
||
1991 | if (intel->render_mask) { |
||
1992 | OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]); |
||
1993 | OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]); |
||
1994 | if (!is_affine) |
||
1995 | OUT_VERTEX(mask_w[1]); |
||
1996 | } |
||
1997 | |||
1998 | OUT_VERTEX(dstX); |
||
1999 | OUT_VERTEX(dstY); |
||
2000 | OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); |
||
2001 | OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); |
||
2002 | if (!is_affine) |
||
2003 | OUT_VERTEX(src_w[0]); |
||
2004 | if (intel->render_mask) { |
||
2005 | OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]); |
||
2006 | OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]); |
||
2007 | if (!is_affine) |
||
2008 | OUT_VERTEX(mask_w[0]); |
||
2009 | } |
||
2010 | } |
||
2011 | |||
2012 | Bool |
||
2013 | i965_prepare_composite(int op, PicturePtr source_picture, |
||
2014 | PicturePtr mask_picture, PicturePtr dest_picture, |
||
2015 | PixmapPtr source, PixmapPtr mask, PixmapPtr dest) |
||
2016 | { |
||
2017 | intel_screen_private *intel = intel_get_screen_private(); |
||
2018 | struct gen4_render_state *render_state = intel->gen4_render_state; |
||
2019 | gen4_composite_op *composite_op = &render_state->composite_op; |
||
2020 | |||
2021 | composite_op->src_filter = |
||
2022 | sampler_state_filter_from_picture(source_picture->filter); |
||
2023 | if (composite_op->src_filter == SS_INVALID_FILTER) { |
||
2024 | intel_debug_fallback("Bad src filter 0x%x\n", |
||
2025 | source_picture->filter); |
||
2026 | return FALSE; |
||
2027 | } |
||
2028 | composite_op->src_extend = |
||
2029 | sampler_state_extend_from_picture(source_picture->repeatType); |
||
2030 | if (composite_op->src_extend == SS_INVALID_EXTEND) { |
||
2031 | intel_debug_fallback("Bad src repeat 0x%x\n", |
||
2032 | source_picture->repeatType); |
||
2033 | return FALSE; |
||
2034 | } |
||
2035 | |||
2036 | if (mask_picture) { |
||
2037 | if (mask_picture->componentAlpha && |
||
2038 | PICT_FORMAT_RGB(mask_picture->format)) { |
||
2039 | /* Check if it's component alpha that relies on a source alpha and on |
||
2040 | * the source value. We can only get one of those into the single |
||
2041 | * source value that we get to blend with. |
||
2042 | */ |
||
2043 | if (i965_blend_op[op].src_alpha && |
||
2044 | (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { |
||
2045 | intel_debug_fallback("Component alpha not supported " |
||
2046 | "with source alpha and source " |
||
2047 | "value blending.\n"); |
||
2048 | return FALSE; |
||
2049 | } |
||
2050 | } |
||
2051 | |||
2052 | composite_op->mask_filter = |
||
2053 | sampler_state_filter_from_picture(mask_picture->filter); |
||
2054 | if (composite_op->mask_filter == SS_INVALID_FILTER) { |
||
2055 | intel_debug_fallback("Bad mask filter 0x%x\n", |
||
2056 | mask_picture->filter); |
||
2057 | return FALSE; |
||
2058 | } |
||
2059 | composite_op->mask_extend = |
||
2060 | sampler_state_extend_from_picture(mask_picture->repeatType); |
||
2061 | if (composite_op->mask_extend == SS_INVALID_EXTEND) { |
||
2062 | intel_debug_fallback("Bad mask repeat 0x%x\n", |
||
2063 | mask_picture->repeatType); |
||
2064 | return FALSE; |
||
2065 | } |
||
2066 | } else { |
||
2067 | composite_op->mask_filter = SS_FILTER_NEAREST; |
||
2068 | composite_op->mask_extend = SS_EXTEND_NONE; |
||
2069 | } |
||
2070 | |||
2071 | /* Flush any pending writes prior to relocating the textures. */ |
||
2072 | if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask)) |
||
2073 | intel_batch_emit_flush(); |
||
2074 | |||
2075 | composite_op->op = op; |
||
2076 | intel->render_source_picture = source_picture; |
||
2077 | intel->render_mask_picture = mask_picture; |
||
2078 | intel->render_dest_picture = dest_picture; |
||
2079 | intel->render_source = source; |
||
2080 | intel->render_mask = mask; |
||
2081 | intel->render_dest = dest; |
||
2082 | |||
2083 | intel->scale_units[0][0] = 1. / source->drawable.width; |
||
2084 | intel->scale_units[0][1] = 1. / source->drawable.height; |
||
2085 | |||
2086 | intel->transform[0] = source_picture->transform; |
||
2087 | composite_op->is_affine = intel_transform_is_affine(intel->transform[0]); |
||
2088 | |||
2089 | if (mask_picture == NULL) { |
||
2090 | intel->transform[1] = NULL; |
||
2091 | intel->scale_units[1][0] = -1; |
||
2092 | intel->scale_units[1][1] = -1; |
||
2093 | } else { |
||
2094 | assert(mask != NULL); |
||
2095 | intel->transform[1] = mask_picture->transform; |
||
2096 | intel->scale_units[1][0] = 1. / mask->drawable.width; |
||
2097 | intel->scale_units[1][1] = 1. / mask->drawable.height; |
||
2098 | composite_op->is_affine &= |
||
2099 | intel_transform_is_affine(intel->transform[1]); |
||
2100 | } |
||
2101 | |||
2102 | if (mask) { |
||
2103 | if (mask_picture->componentAlpha && |
||
2104 | PICT_FORMAT_RGB(mask_picture->format)) { |
||
2105 | if (i965_blend_op[op].src_alpha) { |
||
2106 | if (composite_op->is_affine) |
||
2107 | composite_op->wm_kernel = |
||
2108 | WM_KERNEL_MASKCA_SRCALPHA_AFFINE; |
||
2109 | else |
||
2110 | composite_op->wm_kernel = |
||
2111 | WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; |
||
2112 | } else { |
||
2113 | if (composite_op->is_affine) |
||
2114 | composite_op->wm_kernel = |
||
2115 | WM_KERNEL_MASKCA_AFFINE; |
||
2116 | else |
||
2117 | composite_op->wm_kernel = |
||
2118 | WM_KERNEL_MASKCA_PROJECTIVE; |
||
2119 | } |
||
2120 | } else { |
||
2121 | if (composite_op->is_affine) |
||
2122 | composite_op->wm_kernel = |
||
2123 | WM_KERNEL_MASKNOCA_AFFINE; |
||
2124 | else |
||
2125 | composite_op->wm_kernel = |
||
2126 | WM_KERNEL_MASKNOCA_PROJECTIVE; |
||
2127 | } |
||
2128 | } else { |
||
2129 | if (composite_op->is_affine) |
||
2130 | composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; |
||
2131 | else |
||
2132 | composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; |
||
2133 | } |
||
2134 | |||
2135 | intel->prim_emit = i965_emit_composite_primitive; |
||
2136 | if (!mask) { |
||
2137 | if (intel->transform[0] == NULL) |
||
2138 | intel->prim_emit = i965_emit_composite_primitive_identity_source; |
||
2139 | else if (composite_op->is_affine) |
||
2140 | intel->prim_emit = i965_emit_composite_primitive_affine_source; |
||
2141 | } else { |
||
2142 | if (intel->transform[0] == NULL && intel->transform[1] == NULL) |
||
2143 | intel->prim_emit = i965_emit_composite_primitive_identity_source_mask; |
||
2144 | } |
||
2145 | |||
2146 | intel->floats_per_vertex = |
||
2147 | 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3); |
||
2148 | |||
2149 | if (!i965_composite_check_aperture(intel)) { |
||
2150 | intel_batch_submit(); |
||
2151 | if (!i965_composite_check_aperture(intel)) { |
||
2152 | intel_debug_fallback("Couldn't fit render operation " |
||
2153 | "in aperture\n"); |
||
2154 | return FALSE; |
||
2155 | } |
||
2156 | } |
||
2157 | |||
2158 | if (sizeof(intel->surface_data) - intel->surface_used < |
||
2159 | 4 * SURFACE_STATE_PADDED_SIZE) |
||
2160 | i965_surface_flush(intel); |
||
2161 | |||
2162 | intel->needs_render_state_emit = TRUE; |
||
2163 | |||
2164 | return TRUE; |
||
2165 | } |
||
2166 | |||
2167 | static void i965_select_vertex_buffer(struct intel_screen_private *intel) |
||
2168 | { |
||
2169 | int id = intel->gen4_render_state->composite_op.vertex_id; |
||
2170 | int modifyenable = 0; |
||
2171 | |||
2172 | if (intel->vertex_id & (1 << id)) |
||
2173 | return; |
||
2174 | |||
2175 | if (INTEL_INFO(intel)->gen >= 070) |
||
2176 | modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE; |
||
2177 | |||
2178 | /* Set up the pointer to our (single) vertex buffer */ |
||
2179 | OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); |
||
2180 | |||
2181 | /* XXX could use multiple vbo to reduce relocations if |
||
2182 | * frequently switching between vertex sizes, like rgb10text. |
||
2183 | */ |
||
2184 | if (INTEL_INFO(intel)->gen >= 060) { |
||
2185 | OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | |
||
2186 | GEN6_VB0_VERTEXDATA | |
||
2187 | modifyenable | |
||
2188 | (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); |
||
2189 | } else { |
||
2190 | OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | |
||
2191 | VB0_VERTEXDATA | |
||
2192 | (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); |
||
2193 | } |
||
2194 | OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); |
||
2195 | if (INTEL_INFO(intel)->gen >= 050) |
||
2196 | OUT_RELOC(intel->vertex_bo, |
||
2197 | I915_GEM_DOMAIN_VERTEX, 0, |
||
2198 | sizeof(intel->vertex_ptr) - 1); |
||
2199 | else |
||
2200 | OUT_BATCH(0); |
||
2201 | OUT_BATCH(0); // ignore for VERTEXDATA, but still there |
||
2202 | |||
2203 | intel->vertex_id |= 1 << id; |
||
2204 | } |
||
2205 | |||
2206 | static void i965_bind_surfaces(struct intel_screen_private *intel) |
||
2207 | { |
||
2208 | uint32_t *binding_table; |
||
2209 | |||
2210 | assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); |
||
2211 | |||
2212 | binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); |
||
2213 | intel->surface_table = intel->surface_used; |
||
2214 | intel->surface_used += SURFACE_STATE_PADDED_SIZE; |
||
2215 | |||
2216 | binding_table[0] = |
||
2217 | i965_set_picture_surface_state(intel, |
||
2218 | intel->render_dest_picture, |
||
2219 | intel->render_dest, |
||
2220 | TRUE); |
||
2221 | binding_table[1] = |
||
2222 | i965_set_picture_surface_state(intel, |
||
2223 | intel->render_source_picture, |
||
2224 | intel->render_source, |
||
2225 | FALSE); |
||
2226 | if (intel->render_mask) { |
||
2227 | binding_table[2] = |
||
2228 | i965_set_picture_surface_state(intel, |
||
2229 | intel->render_mask_picture, |
||
2230 | intel->render_mask, |
||
2231 | FALSE); |
||
2232 | } |
||
2233 | } |
||
2234 | |||
2235 | void |
||
2236 | i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, |
||
2237 | int dstX, int dstY, int w, int h) |
||
2238 | { |
||
2239 | intel_screen_private *intel = intel_get_screen_private(); |
||
2240 | |||
2241 | intel_batch_start_atomic(200); |
||
2242 | if (intel->needs_render_state_emit) { |
||
2243 | i965_bind_surfaces(intel); |
||
2244 | |||
2245 | if (INTEL_INFO(intel)->gen >= 060) |
||
2246 | gen6_emit_composite_state(intel); |
||
2247 | else |
||
2248 | i965_emit_composite_state(intel); |
||
2249 | } |
||
2250 | |||
2251 | if (intel->floats_per_vertex != intel->last_floats_per_vertex) { |
||
2252 | intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; |
||
2253 | intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; |
||
2254 | intel->last_floats_per_vertex = intel->floats_per_vertex; |
||
2255 | } |
||
2256 | if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { |
||
2257 | i965_vertex_flush(intel); |
||
2258 | intel_next_vertex(intel); |
||
2259 | intel->vertex_index = 0; |
||
2260 | } |
||
2261 | i965_select_vertex_buffer(intel); |
||
2262 | |||
2263 | if (intel->vertex_offset == 0) { |
||
2264 | if (INTEL_INFO(intel)->gen >= 070) { |
||
2265 | OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); |
||
2266 | OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | |
||
2267 | _3DPRIM_RECTLIST); |
||
2268 | } else { |
||
2269 | OUT_BATCH(BRW_3DPRIMITIVE | |
||
2270 | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | |
||
2271 | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | |
||
2272 | (0 << 9) | |
||
2273 | 4); |
||
2274 | } |
||
2275 | intel->vertex_offset = intel->batch_used; |
||
2276 | OUT_BATCH(0); /* vertex count, to be filled in later */ |
||
2277 | OUT_BATCH(intel->vertex_index); |
||
2278 | OUT_BATCH(1); /* single instance */ |
||
2279 | OUT_BATCH(0); /* start instance location */ |
||
2280 | OUT_BATCH(0); /* index buffer offset, ignored */ |
||
2281 | intel->vertex_count = intel->vertex_index; |
||
2282 | } |
||
2283 | |||
2284 | intel->prim_emit(intel, |
||
2285 | srcX, srcY, |
||
2286 | maskX, maskY, |
||
2287 | dstX, dstY, |
||
2288 | w, h); |
||
2289 | intel->vertex_index += 3; |
||
2290 | |||
2291 | if (INTEL_INFO(intel)->gen < 050) { |
||
2292 | /* XXX OMG! */ |
||
2293 | i965_vertex_flush(intel); |
||
2294 | OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); |
||
2295 | } |
||
2296 | |||
2297 | intel_batch_end_atomic(); |
||
2298 | } |
||
2299 | |||
2300 | void i965_batch_commit_notify(intel_screen_private *intel) |
||
2301 | { |
||
2302 | intel->needs_render_state_emit = TRUE; |
||
2303 | intel->needs_3d_invariant = TRUE; |
||
2304 | intel->last_floats_per_vertex = 0; |
||
2305 | intel->vertex_index = 0; |
||
2306 | |||
2307 | intel->gen4_render_state->composite_op.vertex_id = -1; |
||
2308 | |||
2309 | intel->gen6_render_state.num_sf_outputs = 0; |
||
2310 | intel->gen6_render_state.samplers = NULL; |
||
2311 | intel->gen6_render_state.blend = -1; |
||
2312 | intel->gen6_render_state.kernel = NULL; |
||
2313 | intel->gen6_render_state.drawrect = -1; |
||
2314 | |||
2315 | assert(intel->surface_reloc == 0); |
||
2316 | } |
||
2317 | |||
2318 | /** |
||
2319 | * Called at EnterVT so we can set up our offsets into the state buffer. |
||
2320 | */ |
||
2321 | void gen4_render_state_init() |
||
2322 | { |
||
2323 | ENTER(); |
||
2324 | |||
2325 | intel_screen_private *intel = intel_get_screen_private(); |
||
2326 | struct gen4_render_state *render; |
||
2327 | const struct wm_kernel_info *wm_kernels; |
||
2328 | sampler_state_filter_t src_filter; |
||
2329 | sampler_state_extend_t src_extend; |
||
2330 | sampler_state_filter_t mask_filter; |
||
2331 | sampler_state_extend_t mask_extend; |
||
2332 | drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; |
||
2333 | drm_intel_bo *border_color_bo; |
||
2334 | int m; |
||
2335 | |||
2336 | intel->needs_3d_invariant = TRUE; |
||
2337 | |||
2338 | intel->surface_bo = |
||
2339 | drm_intel_bo_alloc(intel->bufmgr, "surface data", |
||
2340 | sizeof(intel->surface_data), 4096); |
||
2341 | assert(intel->surface_bo); |
||
2342 | |||
2343 | intel->surface_used = 0; |
||
2344 | |||
2345 | if (intel->gen4_render_state == NULL) { |
||
2346 | intel->gen4_render_state = calloc(1, sizeof(*render)); |
||
2347 | assert(intel->gen4_render_state != NULL); |
||
2348 | } |
||
2349 | |||
2350 | if (INTEL_INFO(intel)->gen >= 060) |
||
2351 | return gen6_render_state_init(); |
||
2352 | |||
2353 | render = intel->gen4_render_state; |
||
2354 | render->composite_op.vertex_id = -1; |
||
2355 | |||
2356 | render->vs_state_bo = gen4_create_vs_unit_state(intel); |
||
2357 | |||
2358 | /* Set up the two SF states (one for blending with a mask, one without) */ |
||
2359 | if (IS_GEN5(intel)) { |
||
2360 | sf_kernel_bo = intel_bo_alloc_for_data(intel, |
||
2361 | sf_kernel_static_gen5, |
||
2362 | sizeof |
||
2363 | (sf_kernel_static_gen5), |
||
2364 | "sf kernel gen5"); |
||
2365 | sf_kernel_mask_bo = |
||
2366 | intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5, |
||
2367 | sizeof(sf_kernel_mask_static_gen5), |
||
2368 | "sf mask kernel"); |
||
2369 | } else { |
||
2370 | sf_kernel_bo = intel_bo_alloc_for_data(intel, |
||
2371 | sf_kernel_static, |
||
2372 | sizeof(sf_kernel_static), |
||
2373 | "sf kernel"); |
||
2374 | sf_kernel_mask_bo = intel_bo_alloc_for_data(intel, |
||
2375 | sf_kernel_mask_static, |
||
2376 | sizeof |
||
2377 | (sf_kernel_mask_static), |
||
2378 | "sf mask kernel"); |
||
2379 | } |
||
2380 | render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo); |
||
2381 | render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo); |
||
2382 | drm_intel_bo_unreference(sf_kernel_bo); |
||
2383 | drm_intel_bo_unreference(sf_kernel_mask_bo); |
||
2384 | |||
2385 | wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4; |
||
2386 | for (m = 0; m < KERNEL_COUNT; m++) { |
||
2387 | render->wm_kernel_bo[m] = |
||
2388 | intel_bo_alloc_for_data(intel, |
||
2389 | wm_kernels[m].data, |
||
2390 | wm_kernels[m].size, |
||
2391 | "WM kernel"); |
||
2392 | } |
||
2393 | |||
2394 | /* Set up the WM states: each filter/extend type for source and mask, per |
||
2395 | * kernel. |
||
2396 | */ |
||
2397 | border_color_bo = sampler_border_color_create(intel); |
||
2398 | for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { |
||
2399 | for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { |
||
2400 | for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { |
||
2401 | for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { |
||
2402 | drm_intel_bo *sampler_state_bo; |
||
2403 | |||
2404 | sampler_state_bo = |
||
2405 | i965_create_sampler_state(intel, |
||
2406 | src_filter, src_extend, |
||
2407 | mask_filter, mask_extend, |
||
2408 | border_color_bo); |
||
2409 | |||
2410 | for (m = 0; m < KERNEL_COUNT; m++) { |
||
2411 | render->wm_state_bo[m][src_filter][src_extend][mask_filter][mask_extend] = |
||
2412 | gen4_create_wm_state |
||
2413 | (intel, |
||
2414 | wm_kernels[m]. has_mask, |
||
2415 | render->wm_kernel_bo[m], |
||
2416 | sampler_state_bo); |
||
2417 | } |
||
2418 | drm_intel_bo_unreference(sampler_state_bo); |
||
2419 | } |
||
2420 | } |
||
2421 | } |
||
2422 | } |
||
2423 | drm_intel_bo_unreference(border_color_bo); |
||
2424 | |||
2425 | render->cc_state_bo = gen4_create_cc_unit_state(intel); |
||
2426 | |||
2427 | LEAVE(); |
||
2428 | } |
||
2429 | |||
2430 | /** |
||
2431 | * Called at LeaveVT. |
||
2432 | */ |
||
2433 | void gen4_render_state_cleanup(ScrnInfoPtr scrn) |
||
2434 | { |
||
2435 | intel_screen_private *intel = intel_get_screen_private(scrn); |
||
2436 | struct gen4_render_state *render_state = intel->gen4_render_state; |
||
2437 | int i, j, k, l, m; |
||
2438 | |||
2439 | drm_intel_bo_unreference(intel->surface_bo); |
||
2440 | drm_intel_bo_unreference(render_state->vs_state_bo); |
||
2441 | drm_intel_bo_unreference(render_state->sf_state_bo); |
||
2442 | drm_intel_bo_unreference(render_state->sf_mask_state_bo); |
||
2443 | |||
2444 | for (i = 0; i < KERNEL_COUNT; i++) |
||
2445 | drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); |
||
2446 | |||
2447 | for (i = 0; i < FILTER_COUNT; i++) |
||
2448 | for (j = 0; j < EXTEND_COUNT; j++) |
||
2449 | for (k = 0; k < FILTER_COUNT; k++) |
||
2450 | for (l = 0; l < EXTEND_COUNT; l++) |
||
2451 | for (m = 0; m < KERNEL_COUNT; m++) |
||
2452 | drm_intel_bo_unreference |
||
2453 | (render_state-> |
||
2454 | wm_state_bo[m][i][j][k] |
||
2455 | [l]); |
||
2456 | |||
2457 | for (i = 0; i < FILTER_COUNT; i++) |
||
2458 | for (j = 0; j < EXTEND_COUNT; j++) |
||
2459 | for (k = 0; k < FILTER_COUNT; k++) |
||
2460 | for (l = 0; l < EXTEND_COUNT; l++) |
||
2461 | drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]); |
||
2462 | |||
2463 | drm_intel_bo_unreference(render_state->cc_state_bo); |
||
2464 | |||
2465 | drm_intel_bo_unreference(render_state->cc_vp_bo); |
||
2466 | drm_intel_bo_unreference(render_state->gen6_blend_bo); |
||
2467 | drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo); |
||
2468 | |||
2469 | free(intel->gen4_render_state); |
||
2470 | intel->gen4_render_state = NULL; |
||
2471 | } |
||
2472 | |||
2473 | /* |
||
2474 | * for GEN6+ |
||
2475 | */ |
||
2476 | #define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) |
||
2477 | |||
2478 | static drm_intel_bo * |
||
2479 | gen6_composite_create_cc_state(intel_screen_private *intel) |
||
2480 | { |
||
2481 | struct gen6_color_calc_state *state; |
||
2482 | drm_intel_bo *cc_bo; |
||
2483 | int ret; |
||
2484 | |||
2485 | cc_bo = drm_intel_bo_alloc(intel->bufmgr, |
||
2486 | "gen6 CC state", |
||
2487 | sizeof(*state), |
||
2488 | 4096); |
||
2489 | assert(cc_bo); |
||
2490 | |||
2491 | ret = drm_intel_bo_map(cc_bo, TRUE); |
||
2492 | assert(ret == 0); |
||
2493 | |||
2494 | state = memset(cc_bo->virtual, 0, sizeof(*state)); |
||
2495 | state->constant_r = 1.0; |
||
2496 | state->constant_g = 0.0; |
||
2497 | state->constant_b = 1.0; |
||
2498 | state->constant_a = 1.0; |
||
2499 | drm_intel_bo_unmap(cc_bo); |
||
2500 | |||
2501 | return cc_bo; |
||
2502 | (void)ret; |
||
2503 | } |
||
2504 | |||
2505 | static drm_intel_bo * |
||
2506 | gen6_composite_create_blend_state(intel_screen_private *intel) |
||
2507 | { |
||
2508 | drm_intel_bo *blend_bo; |
||
2509 | int src, dst, ret; |
||
2510 | |||
2511 | blend_bo = drm_intel_bo_alloc(intel->bufmgr, |
||
2512 | "gen6 BLEND state", |
||
2513 | BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, |
||
2514 | 4096); |
||
2515 | assert(blend_bo); |
||
2516 | |||
2517 | ret = drm_intel_bo_map(blend_bo, TRUE); |
||
2518 | assert(ret == 0); |
||
2519 | |||
2520 | memset(blend_bo->virtual, 0, blend_bo->size); |
||
2521 | for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) { |
||
2522 | for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) { |
||
2523 | uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE; |
||
2524 | struct gen6_blend_state *blend; |
||
2525 | |||
2526 | blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset); |
||
2527 | blend->blend0.dest_blend_factor = dst; |
||
2528 | blend->blend0.source_blend_factor = src; |
||
2529 | blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD; |
||
2530 | blend->blend0.blend_enable = 1; |
||
2531 | |||
2532 | blend->blend1.post_blend_clamp_enable = 1; |
||
2533 | blend->blend1.pre_blend_clamp_enable = 1; |
||
2534 | } |
||
2535 | } |
||
2536 | |||
2537 | drm_intel_bo_unmap(blend_bo); |
||
2538 | return blend_bo; |
||
2539 | (void)ret; |
||
2540 | } |
||
2541 | |||
2542 | static drm_intel_bo * |
||
2543 | gen6_composite_create_depth_stencil_state(intel_screen_private *intel) |
||
2544 | { |
||
2545 | drm_intel_bo *depth_stencil_bo; |
||
2546 | int ret; |
||
2547 | |||
2548 | depth_stencil_bo = |
||
2549 | drm_intel_bo_alloc(intel->bufmgr, |
||
2550 | "gen6 DEPTH_STENCIL state", |
||
2551 | sizeof(struct gen6_depth_stencil_state), |
||
2552 | 4096); |
||
2553 | assert(depth_stencil_bo); |
||
2554 | |||
2555 | ret = drm_intel_bo_map(depth_stencil_bo, TRUE); |
||
2556 | assert(ret == 0); |
||
2557 | |||
2558 | memset(depth_stencil_bo->virtual, 0, |
||
2559 | sizeof(struct gen6_depth_stencil_state)); |
||
2560 | drm_intel_bo_unmap(depth_stencil_bo); |
||
2561 | |||
2562 | return depth_stencil_bo; |
||
2563 | (void)ret; |
||
2564 | } |
||
2565 | |||
2566 | static void |
||
2567 | gen6_composite_state_base_address(intel_screen_private *intel) |
||
2568 | { |
||
2569 | OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); |
||
2570 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ |
||
2571 | intel->surface_reloc = intel->batch_used; |
||
2572 | intel_batch_emit_dword(intel, |
||
2573 | intel->surface_bo->offset | BASE_ADDRESS_MODIFY); |
||
2574 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ |
||
2575 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ |
||
2576 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ |
||
2577 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ |
||
2578 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ |
||
2579 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ |
||
2580 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ |
||
2581 | } |
||
2582 | |||
2583 | static void |
||
2584 | gen6_composite_cc_state_pointers(intel_screen_private *intel, |
||
2585 | uint32_t blend_offset) |
||
2586 | { |
||
2587 | struct gen4_render_state *render_state = intel->gen4_render_state; |
||
2588 | drm_intel_bo *cc_bo = NULL; |
||
2589 | drm_intel_bo *depth_stencil_bo = NULL; |
||
2590 | |||
2591 | if (intel->gen6_render_state.blend == blend_offset) |
||
2592 | return; |
||
2593 | |||
2594 | if (intel->gen6_render_state.blend == -1) { |
||
2595 | cc_bo = render_state->cc_state_bo; |
||
2596 | depth_stencil_bo = render_state->gen6_depth_stencil_bo; |
||
2597 | } |
||
2598 | if (INTEL_INFO(intel)->gen >= 070) { |
||
2599 | gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); |
||
2600 | } else { |
||
2601 | gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); |
||
2602 | } |
||
2603 | |||
2604 | intel->gen6_render_state.blend = blend_offset; |
||
2605 | } |
||
2606 | |||
2607 | static void |
||
2608 | gen6_composite_sampler_state_pointers(intel_screen_private *intel, |
||
2609 | drm_intel_bo *bo) |
||
2610 | { |
||
2611 | if (intel->gen6_render_state.samplers == bo) |
||
2612 | return; |
||
2613 | |||
2614 | intel->gen6_render_state.samplers = bo; |
||
2615 | |||
2616 | if (INTEL_INFO(intel)->gen >= 070) |
||
2617 | gen7_upload_sampler_state_pointers(intel, bo); |
||
2618 | else |
||
2619 | gen6_upload_sampler_state_pointers(intel, bo); |
||
2620 | } |
||
2621 | |||
2622 | static void |
||
2623 | gen6_composite_wm_constants(intel_screen_private *intel) |
||
2624 | { |
||
2625 | Bool ivb = INTEL_INFO(intel)->gen >= 070; |
||
2626 | /* disable WM constant buffer */ |
||
2627 | OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2)); |
||
2628 | OUT_BATCH(0); |
||
2629 | OUT_BATCH(0); |
||
2630 | OUT_BATCH(0); |
||
2631 | OUT_BATCH(0); |
||
2632 | if (ivb) { |
||
2633 | OUT_BATCH(0); |
||
2634 | OUT_BATCH(0); |
||
2635 | } |
||
2636 | } |
||
2637 | |||
2638 | static void |
||
2639 | gen6_composite_sf_state(intel_screen_private *intel, |
||
2640 | Bool has_mask) |
||
2641 | { |
||
2642 | int num_sf_outputs = has_mask ? 2 : 1; |
||
2643 | |||
2644 | if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs) |
||
2645 | return; |
||
2646 | |||
2647 | intel->gen6_render_state.num_sf_outputs = num_sf_outputs; |
||
2648 | |||
2649 | if (INTEL_INFO(intel)->gen >= 070) |
||
2650 | gen7_upload_sf_state(intel, num_sf_outputs, 1); |
||
2651 | else |
||
2652 | gen6_upload_sf_state(intel, num_sf_outputs, 1); |
||
2653 | } |
||
2654 | |||
2655 | static void |
||
2656 | gen6_composite_wm_state(intel_screen_private *intel, |
||
2657 | Bool has_mask, |
||
2658 | drm_intel_bo *bo) |
||
2659 | { |
||
2660 | int num_surfaces = has_mask ? 3 : 2; |
||
2661 | int num_sf_outputs = has_mask ? 2 : 1; |
||
2662 | |||
2663 | if (intel->gen6_render_state.kernel == bo) |
||
2664 | return; |
||
2665 | |||
2666 | intel->gen6_render_state.kernel = bo; |
||
2667 | |||
2668 | OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); |
||
2669 | OUT_RELOC(bo, |
||
2670 | I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
2671 | 0); |
||
2672 | OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | |
||
2673 | (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); |
||
2674 | OUT_BATCH(0); |
||
2675 | OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ |
||
2676 | OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | |
||
2677 | GEN6_3DSTATE_WM_DISPATCH_ENABLE | |
||
2678 | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); |
||
2679 | OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | |
||
2680 | GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); |
||
2681 | OUT_BATCH(0); |
||
2682 | OUT_BATCH(0); |
||
2683 | } |
||
2684 | |||
2685 | static void |
||
2686 | gen7_composite_wm_state(intel_screen_private *intel, |
||
2687 | Bool has_mask, |
||
2688 | drm_intel_bo *bo) |
||
2689 | { |
||
2690 | int num_surfaces = has_mask ? 3 : 2; |
||
2691 | unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; |
||
2692 | unsigned int num_samples = 0; |
||
2693 | |||
2694 | if (IS_HSW(intel)) { |
||
2695 | max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; |
||
2696 | num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; |
||
2697 | } |
||
2698 | |||
2699 | if (intel->gen6_render_state.kernel == bo) |
||
2700 | return; |
||
2701 | |||
2702 | intel->gen6_render_state.kernel = bo; |
||
2703 | |||
2704 | OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); |
||
2705 | OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | |
||
2706 | GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); |
||
2707 | OUT_BATCH(0); |
||
2708 | |||
2709 | OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); |
||
2710 | OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); |
||
2711 | OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | |
||
2712 | (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); |
||
2713 | OUT_BATCH(0); /* scratch space base offset */ |
||
2714 | OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples | |
||
2715 | GEN7_PS_ATTRIBUTE_ENABLE | |
||
2716 | GEN7_PS_16_DISPATCH_ENABLE); |
||
2717 | OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); |
||
2718 | OUT_BATCH(0); /* kernel 1 pointer */ |
||
2719 | OUT_BATCH(0); /* kernel 2 pointer */ |
||
2720 | } |
||
2721 | |||
2722 | |||
2723 | static void |
||
2724 | gen6_composite_drawing_rectangle(intel_screen_private *intel, |
||
2725 | PixmapPtr dest) |
||
2726 | { |
||
2727 | uint32_t dw = |
||
2728 | DRAW_YMAX(dest->drawable.height - 1) | |
||
2729 | DRAW_XMAX(dest->drawable.width - 1); |
||
2730 | |||
2731 | /* XXX cacomposite depends upon the implicit non-pipelined flush */ |
||
2732 | if (0 && intel->gen6_render_state.drawrect == dw) |
||
2733 | return; |
||
2734 | intel->gen6_render_state.drawrect = dw; |
||
2735 | |||
2736 | OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); |
||
2737 | OUT_BATCH(0x00000000); /* ymin, xmin */ |
||
2738 | OUT_BATCH(dw); /* ymax, xmax */ |
||
2739 | OUT_BATCH(0x00000000); /* yorigin, xorigin */ |
||
2740 | } |
||
2741 | |||
2742 | static void |
||
2743 | gen6_composite_vertex_element_state(intel_screen_private *intel, |
||
2744 | Bool has_mask, |
||
2745 | Bool is_affine) |
||
2746 | { |
||
2747 | /* |
||
2748 | * vertex data in vertex buffer |
||
2749 | * position: (x, y) |
||
2750 | * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) |
||
2751 | * texture coordinate 1 if (has_mask is TRUE): same as above |
||
2752 | */ |
||
2753 | gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op; |
||
2754 | int nelem = has_mask ? 2 : 1; |
||
2755 | int selem = is_affine ? 2 : 3; |
||
2756 | uint32_t w_component; |
||
2757 | uint32_t src_format; |
||
2758 | int id; |
||
2759 | |||
2760 | id = has_mask << 1 | is_affine; |
||
2761 | |||
2762 | if (composite_op->vertex_id == id) |
||
2763 | return; |
||
2764 | |||
2765 | composite_op->vertex_id = id; |
||
2766 | |||
2767 | if (is_affine) { |
||
2768 | src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; |
||
2769 | w_component = BRW_VFCOMPONENT_STORE_1_FLT; |
||
2770 | } else { |
||
2771 | src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; |
||
2772 | w_component = BRW_VFCOMPONENT_STORE_SRC; |
||
2773 | } |
||
2774 | |||
2775 | /* The VUE layout |
||
2776 | * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) |
||
2777 | * dword 4-7: position (x, y, 1.0, 1.0), |
||
2778 | * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) |
||
2779 | * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) |
||
2780 | * |
||
2781 | * dword 4-15 are fetched from vertex buffer |
||
2782 | */ |
||
2783 | OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | |
||
2784 | ((2 * (2 + nelem)) + 1 - 2)); |
||
2785 | |||
2786 | OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | |
||
2787 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | |
||
2788 | (0 << VE0_OFFSET_SHIFT)); |
||
2789 | OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | |
||
2790 | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | |
||
2791 | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | |
||
2792 | (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); |
||
2793 | |||
2794 | /* x,y */ |
||
2795 | OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | |
||
2796 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | |
||
2797 | (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ |
||
2798 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
2799 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
2800 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | |
||
2801 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); |
||
2802 | |||
2803 | /* u0, v0, w0 */ |
||
2804 | OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | |
||
2805 | (src_format << VE0_FORMAT_SHIFT) | |
||
2806 | ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ |
||
2807 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
2808 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
2809 | (w_component << VE1_VFCOMPONENT_2_SHIFT) | |
||
2810 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); |
||
2811 | |||
2812 | /* u1, v1, w1 */ |
||
2813 | if (has_mask) { |
||
2814 | OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | |
||
2815 | GEN6_VE0_VALID | |
||
2816 | (src_format << VE0_FORMAT_SHIFT) | |
||
2817 | (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ |
||
2818 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | |
||
2819 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | |
||
2820 | (w_component << VE1_VFCOMPONENT_2_SHIFT) | |
||
2821 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); |
||
2822 | } |
||
2823 | } |
||
2824 | |||
2825 | static void |
||
2826 | gen6_emit_composite_state(struct intel_screen_private *intel) |
||
2827 | { |
||
2828 | struct gen4_render_state *render = intel->gen4_render_state; |
||
2829 | gen4_composite_op *composite_op = &render->composite_op; |
||
2830 | sampler_state_filter_t src_filter = composite_op->src_filter; |
||
2831 | sampler_state_filter_t mask_filter = composite_op->mask_filter; |
||
2832 | sampler_state_extend_t src_extend = composite_op->src_extend; |
||
2833 | sampler_state_extend_t mask_extend = composite_op->mask_extend; |
||
2834 | Bool is_affine = composite_op->is_affine; |
||
2835 | Bool has_mask = intel->render_mask != NULL; |
||
2836 | Bool ivb = INTEL_INFO(intel)->gen >= 070; |
||
2837 | uint32_t src, dst; |
||
2838 | drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; |
||
2839 | |||
2840 | intel->needs_render_state_emit = FALSE; |
||
2841 | if (intel->needs_3d_invariant) { |
||
2842 | gen6_upload_invariant_states(intel); |
||
2843 | |||
2844 | if (ivb) { |
||
2845 | gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo); |
||
2846 | gen7_upload_urb(intel); |
||
2847 | gen7_upload_bypass_states(intel); |
||
2848 | gen7_upload_depth_buffer_state(intel); |
||
2849 | } else { |
||
2850 | gen6_upload_invariant_states(intel); |
||
2851 | gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo); |
||
2852 | gen6_upload_urb(intel); |
||
2853 | |||
2854 | gen6_upload_gs_state(intel); |
||
2855 | gen6_upload_depth_buffer_state(intel); |
||
2856 | } |
||
2857 | gen6_composite_wm_constants(intel); |
||
2858 | gen6_upload_vs_state(intel); |
||
2859 | gen6_upload_clip_state(intel); |
||
2860 | |||
2861 | intel->needs_3d_invariant = FALSE; |
||
2862 | } |
||
2863 | |||
2864 | i965_get_blend_cntl(composite_op->op, |
||
2865 | intel->render_mask_picture, |
||
2866 | intel->render_dest_picture->format, |
||
2867 | &src, &dst); |
||
2868 | |||
2869 | if (intel->surface_reloc == 0) |
||
2870 | gen6_composite_state_base_address(intel); |
||
2871 | |||
2872 | gen6_composite_cc_state_pointers(intel, |
||
2873 | (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE); |
||
2874 | gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo); |
||
2875 | gen6_composite_sf_state(intel, has_mask); |
||
2876 | if (ivb) { |
||
2877 | gen7_composite_wm_state(intel, has_mask, |
||
2878 | render->wm_kernel_bo[composite_op->wm_kernel]); |
||
2879 | gen7_upload_binding_table(intel, intel->surface_table); |
||
2880 | } else { |
||
2881 | gen6_composite_wm_state(intel, has_mask, |
||
2882 | render->wm_kernel_bo[composite_op->wm_kernel]); |
||
2883 | gen6_upload_binding_table(intel, intel->surface_table); |
||
2884 | } |
||
2885 | gen6_composite_drawing_rectangle(intel, intel->render_dest); |
||
2886 | gen6_composite_vertex_element_state(intel, has_mask, is_affine); |
||
2887 | } |
||
2888 | |||
2889 | static void |
||
2890 | gen6_render_state_init() |
||
2891 | { |
||
2892 | ENTER(); |
||
2893 | |||
2894 | intel_screen_private *intel = intel_get_screen_private(); |
||
2895 | struct gen4_render_state *render; |
||
2896 | sampler_state_filter_t src_filter; |
||
2897 | sampler_state_filter_t mask_filter; |
||
2898 | sampler_state_extend_t src_extend; |
||
2899 | sampler_state_extend_t mask_extend; |
||
2900 | int m; |
||
2901 | drm_intel_bo *border_color_bo; |
||
2902 | const struct wm_kernel_info *wm_kernels; |
||
2903 | |||
2904 | render= intel->gen4_render_state; |
||
2905 | render->composite_op.vertex_id = -1; |
||
2906 | |||
2907 | intel->gen6_render_state.num_sf_outputs = 0; |
||
2908 | intel->gen6_render_state.samplers = NULL; |
||
2909 | intel->gen6_render_state.blend = -1; |
||
2910 | intel->gen6_render_state.kernel = NULL; |
||
2911 | intel->gen6_render_state.drawrect = -1; |
||
2912 | |||
2913 | wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6; |
||
2914 | for (m = 0; m < KERNEL_COUNT; m++) { |
||
2915 | render->wm_kernel_bo[m] = |
||
2916 | intel_bo_alloc_for_data(intel, |
||
2917 | wm_kernels[m].data, |
||
2918 | wm_kernels[m].size, |
||
2919 | "WM kernel gen6/7"); |
||
2920 | } |
||
2921 | |||
2922 | border_color_bo = sampler_border_color_create(intel); |
||
2923 | |||
2924 | for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { |
||
2925 | for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { |
||
2926 | for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { |
||
2927 | for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { |
||
2928 | render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend] = |
||
2929 | i965_create_sampler_state(intel, |
||
2930 | src_filter, src_extend, |
||
2931 | mask_filter, mask_extend, |
||
2932 | border_color_bo); |
||
2933 | } |
||
2934 | } |
||
2935 | } |
||
2936 | } |
||
2937 | |||
2938 | drm_intel_bo_unreference(border_color_bo); |
||
2939 | render->cc_vp_bo = gen4_create_cc_viewport(intel); |
||
2940 | render->cc_state_bo = gen6_composite_create_cc_state(intel); |
||
2941 | render->gen6_blend_bo = gen6_composite_create_blend_state(intel); |
||
2942 | render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel); |
||
2943 | |||
2944 | LEAVE(); |
||
2945 | } |
||
2946 | |||
2947 | void i965_vertex_flush(struct intel_screen_private *intel) |
||
2948 | { |
||
2949 | if (intel->vertex_offset) { |
||
2950 | intel->batch_ptr[intel->vertex_offset] = |
||
2951 | intel->vertex_index - intel->vertex_count; |
||
2952 | intel->vertex_offset = 0; |
||
2953 | } |
||
2954 | } |
||
2955 | |||
2956 | void i965_batch_flush(struct intel_screen_private *intel) |
||
2957 | { |
||
2958 | if (intel->surface_used) |
||
2959 | i965_surface_flush(intel); |
||
2960 | } |
||
2961 | |||
2962 | |||
2963 | #if HAS_DEVPRIVATEKEYREC |
||
2964 | DevPrivateKeyRec uxa_pixmap_index; |
||
2965 | #else |
||
2966 | int uxa_pixmap_index; |
||
2967 | #endif |
||
2968 | |||
2969 | #define xFixedToFloat(val) \ |
||
2970 | ((float)xFixedToInt(val) + ((float)xFixedFrac(val) / 65536.0)) |
||
2971 | |||
2972 | static Bool |
||
2973 | _intel_transform_point(PictTransformPtr transform, |
||
2974 | float x, float y, float result[3]) |
||
2975 | { |
||
2976 | int j; |
||
2977 | |||
2978 | for (j = 0; j < 3; j++) { |
||
2979 | result[j] = (xFixedToFloat(transform->matrix[j][0]) * x + |
||
2980 | xFixedToFloat(transform->matrix[j][1]) * y + |
||
2981 | xFixedToFloat(transform->matrix[j][2])); |
||
2982 | } |
||
2983 | if (!result[2]) |
||
2984 | return FALSE; |
||
2985 | return TRUE; |
||
2986 | } |
||
2987 | |||
2988 | /** |
||
2989 | * Returns the floating-point coordinates transformed by the given transform. |
||
2990 | * |
||
2991 | * transform may be null. |
||
2992 | */ |
||
2993 | Bool |
||
2994 | intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform, |
||
2995 | float *x_out, float *y_out) |
||
2996 | { |
||
2997 | if (transform == NULL) { |
||
2998 | *x_out = x; |
||
2999 | *y_out = y; |
||
3000 | } else { |
||
3001 | float result[3]; |
||
3002 | |||
3003 | if (!_intel_transform_point(transform, |
||
3004 | x, y, |
||
3005 | result)) |
||
3006 | return FALSE; |
||
3007 | *x_out = result[0] / result[2]; |
||
3008 | *y_out = result[1] / result[2]; |
||
3009 | } |
||
3010 | return TRUE; |
||
3011 | } |
||
3012 | |||
3013 | /** |
||
3014 | * Returns the un-normalized floating-point coordinates transformed by the given transform. |
||
3015 | * |
||
3016 | * transform may be null. |
||
3017 | */ |
||
3018 | Bool |
||
3019 | intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform, |
||
3020 | float *x_out, float *y_out, float *w_out) |
||
3021 | { |
||
3022 | if (transform == NULL) { |
||
3023 | *x_out = x; |
||
3024 | *y_out = y; |
||
3025 | *w_out = 1; |
||
3026 | } else { |
||
3027 | float result[3]; |
||
3028 | |||
3029 | if (!_intel_transform_point(transform, |
||
3030 | x, y, |
||
3031 | result)) |
||
3032 | return FALSE; |
||
3033 | *x_out = result[0]; |
||
3034 | *y_out = result[1]; |
||
3035 | *w_out = result[2]; |
||
3036 | } |
||
3037 | return TRUE; |
||
3038 | } |
||
3039 | |||
3040 | /** |
||
3041 | * Returns whether the provided transform is affine. |
||
3042 | * |
||
3043 | * transform may be null. |
||
3044 | */ |
||
3045 | Bool intel_transform_is_affine(PictTransformPtr t) |
||
3046 | { |
||
3047 | if (t == NULL) |
||
3048 | return TRUE; |
||
3049 | return t->matrix[2][0] == 0 && t->matrix[2][1] == 0; |
||
3050 | }>>>>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>>>>>>>>>>>>>>>>>>><>><>>=>><>><>><>><>><>><> |