Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3770 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2010 Christian König |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | #include |
||
29 | |||
30 | #include "pipe/p_context.h" |
||
31 | #include "pipe/p_screen.h" |
||
32 | |||
33 | #include "util/u_draw.h" |
||
34 | #include "util/u_sampler.h" |
||
35 | #include "util/u_memory.h" |
||
36 | |||
37 | #include "tgsi/tgsi_ureg.h" |
||
38 | |||
39 | #include "vl_defines.h" |
||
40 | #include "vl_types.h" |
||
41 | #include "vl_vertex_buffers.h" |
||
42 | #include "vl_idct.h" |
||
43 | |||
44 | enum VS_OUTPUT |
||
45 | { |
||
46 | VS_O_VPOS = 0, |
||
47 | VS_O_L_ADDR0 = 0, |
||
48 | VS_O_L_ADDR1, |
||
49 | VS_O_R_ADDR0, |
||
50 | VS_O_R_ADDR1 |
||
51 | }; |
||
52 | |||
53 | /** |
||
54 | * The DCT matrix stored as hex representation of floats. Equal to the following equation: |
||
55 | * for (i = 0; i < 8; ++i) |
||
56 | * for (j = 0; j < 8; ++j) |
||
57 | * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f); |
||
58 | * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f)); |
||
59 | */ |
||
60 | static const uint32_t const_matrix[8][8] = { |
||
61 | { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 }, |
||
62 | { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf }, |
||
63 | { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f }, |
||
64 | { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 }, |
||
65 | { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 }, |
||
66 | { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 }, |
||
67 | { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 }, |
||
68 | { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 }, |
||
69 | }; |
||
70 | |||
71 | static void |
||
72 | calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], |
||
73 | struct ureg_src tc, struct ureg_src start, bool right_side, |
||
74 | bool transposed, float size) |
||
75 | { |
||
76 | unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; |
||
77 | unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; |
||
78 | |||
79 | unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; |
||
80 | unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; |
||
81 | |||
82 | /* |
||
83 | * addr[0..1].(start) = right_side ? start.x : tc.x |
||
84 | * addr[0..1].(tc) = right_side ? tc.y : start.y |
||
85 | * addr[0..1].z = tc.z |
||
86 | * addr[1].(start) += 1.0f / scale |
||
87 | */ |
||
88 | ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); |
||
89 | ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); |
||
90 | |||
91 | ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); |
||
92 | ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); |
||
93 | } |
||
94 | |||
95 | static void |
||
96 | increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], |
||
97 | struct ureg_src saddr[2], bool right_side, bool transposed, |
||
98 | int pos, float size) |
||
99 | { |
||
100 | unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; |
||
101 | unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; |
||
102 | |||
103 | /* |
||
104 | * daddr[0..1].(start) = saddr[0..1].(start) |
||
105 | * daddr[0..1].(tc) = saddr[0..1].(tc) |
||
106 | */ |
||
107 | |||
108 | ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); |
||
109 | ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); |
||
110 | ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); |
||
111 | ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); |
||
112 | } |
||
113 | |||
114 | static void |
||
115 | fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], |
||
116 | struct ureg_src sampler, bool resource3d) |
||
117 | { |
||
118 | ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler); |
||
119 | ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler); |
||
120 | } |
||
121 | |||
122 | static void |
||
123 | matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) |
||
124 | { |
||
125 | struct ureg_dst tmp; |
||
126 | |||
127 | tmp = ureg_DECL_temporary(shader); |
||
128 | |||
129 | /* |
||
130 | * tmp.xy = dot4(m[0][0..1], m[1][0..1]) |
||
131 | * dst = tmp.x + tmp.y |
||
132 | */ |
||
133 | ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); |
||
134 | ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); |
||
135 | ureg_ADD(shader, dst, |
||
136 | ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), |
||
137 | ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); |
||
138 | |||
139 | ureg_release_temporary(shader, tmp); |
||
140 | } |
||
141 | |||
142 | static void * |
||
143 | create_mismatch_vert_shader(struct vl_idct *idct) |
||
144 | { |
||
145 | struct ureg_program *shader; |
||
146 | struct ureg_src vpos; |
||
147 | struct ureg_src scale; |
||
148 | struct ureg_dst t_tex; |
||
149 | struct ureg_dst o_vpos, o_addr[2]; |
||
150 | |||
151 | shader = ureg_create(TGSI_PROCESSOR_VERTEX); |
||
152 | if (!shader) |
||
153 | return NULL; |
||
154 | |||
155 | vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); |
||
156 | |||
157 | t_tex = ureg_DECL_temporary(shader); |
||
158 | |||
159 | o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); |
||
160 | |||
161 | o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); |
||
162 | o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); |
||
163 | |||
164 | /* |
||
165 | * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) |
||
166 | * |
||
167 | * t_vpos = vpos + 7 / VL_BLOCK_WIDTH |
||
168 | * o_vpos.xy = t_vpos * scale |
||
169 | * |
||
170 | * o_addr = calc_addr(...) |
||
171 | * |
||
172 | */ |
||
173 | |||
174 | scale = ureg_imm2f(shader, |
||
175 | (float)VL_BLOCK_WIDTH / idct->buffer_width, |
||
176 | (float)VL_BLOCK_HEIGHT / idct->buffer_height); |
||
177 | |||
178 | ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); |
||
179 | ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); |
||
180 | |||
181 | ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); |
||
182 | calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); |
||
183 | |||
184 | ureg_release_temporary(shader, t_tex); |
||
185 | |||
186 | ureg_END(shader); |
||
187 | |||
188 | return ureg_create_shader_and_destroy(shader, idct->pipe); |
||
189 | } |
||
190 | |||
191 | static void * |
||
192 | create_mismatch_frag_shader(struct vl_idct *idct) |
||
193 | { |
||
194 | struct ureg_program *shader; |
||
195 | |||
196 | struct ureg_src addr[2]; |
||
197 | |||
198 | struct ureg_dst m[8][2]; |
||
199 | struct ureg_dst fragment; |
||
200 | |||
201 | unsigned i; |
||
202 | |||
203 | shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); |
||
204 | if (!shader) |
||
205 | return NULL; |
||
206 | |||
207 | addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); |
||
208 | addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); |
||
209 | |||
210 | fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); |
||
211 | |||
212 | for (i = 0; i < 8; ++i) { |
||
213 | m[i][0] = ureg_DECL_temporary(shader); |
||
214 | m[i][1] = ureg_DECL_temporary(shader); |
||
215 | } |
||
216 | |||
217 | for (i = 0; i < 8; ++i) { |
||
218 | increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); |
||
219 | } |
||
220 | |||
221 | for (i = 0; i < 8; ++i) { |
||
222 | struct ureg_src s_addr[2]; |
||
223 | s_addr[0] = ureg_src(m[i][0]); |
||
224 | s_addr[1] = ureg_src(m[i][1]); |
||
225 | fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); |
||
226 | } |
||
227 | |||
228 | for (i = 1; i < 8; ++i) { |
||
229 | ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); |
||
230 | ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); |
||
231 | } |
||
232 | |||
233 | ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); |
||
234 | ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); |
||
235 | |||
236 | ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); |
||
237 | ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); |
||
238 | ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); |
||
239 | |||
240 | ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), |
||
241 | ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); |
||
242 | ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), |
||
243 | ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); |
||
244 | |||
245 | ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); |
||
246 | ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); |
||
247 | |||
248 | for (i = 0; i < 8; ++i) { |
||
249 | ureg_release_temporary(shader, m[i][0]); |
||
250 | ureg_release_temporary(shader, m[i][1]); |
||
251 | } |
||
252 | |||
253 | ureg_END(shader); |
||
254 | |||
255 | return ureg_create_shader_and_destroy(shader, idct->pipe); |
||
256 | } |
||
257 | |||
258 | static void * |
||
259 | create_stage1_vert_shader(struct vl_idct *idct) |
||
260 | { |
||
261 | struct ureg_program *shader; |
||
262 | struct ureg_src vrect, vpos; |
||
263 | struct ureg_src scale; |
||
264 | struct ureg_dst t_tex, t_start; |
||
265 | struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; |
||
266 | |||
267 | shader = ureg_create(TGSI_PROCESSOR_VERTEX); |
||
268 | if (!shader) |
||
269 | return NULL; |
||
270 | |||
271 | vrect = ureg_DECL_vs_input(shader, VS_I_RECT); |
||
272 | vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); |
||
273 | |||
274 | t_tex = ureg_DECL_temporary(shader); |
||
275 | t_start = ureg_DECL_temporary(shader); |
||
276 | |||
277 | o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); |
||
278 | |||
279 | o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); |
||
280 | o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); |
||
281 | |||
282 | o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); |
||
283 | o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); |
||
284 | |||
285 | /* |
||
286 | * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) |
||
287 | * |
||
288 | * t_vpos = vpos + vrect |
||
289 | * o_vpos.xy = t_vpos * scale |
||
290 | * o_vpos.zw = vpos |
||
291 | * |
||
292 | * o_l_addr = calc_addr(...) |
||
293 | * o_r_addr = calc_addr(...) |
||
294 | * |
||
295 | */ |
||
296 | |||
297 | scale = ureg_imm2f(shader, |
||
298 | (float)VL_BLOCK_WIDTH / idct->buffer_width, |
||
299 | (float)VL_BLOCK_HEIGHT / idct->buffer_height); |
||
300 | |||
301 | ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); |
||
302 | ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); |
||
303 | |||
304 | ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); |
||
305 | ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); |
||
306 | |||
307 | ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); |
||
308 | |||
309 | calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); |
||
310 | calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4); |
||
311 | |||
312 | ureg_release_temporary(shader, t_tex); |
||
313 | ureg_release_temporary(shader, t_start); |
||
314 | |||
315 | ureg_END(shader); |
||
316 | |||
317 | return ureg_create_shader_and_destroy(shader, idct->pipe); |
||
318 | } |
||
319 | |||
320 | static void * |
||
321 | create_stage1_frag_shader(struct vl_idct *idct) |
||
322 | { |
||
323 | struct ureg_program *shader; |
||
324 | |||
325 | struct ureg_src l_addr[2], r_addr[2]; |
||
326 | |||
327 | struct ureg_dst l[4][2], r[2]; |
||
328 | struct ureg_dst *fragment; |
||
329 | |||
330 | int i, j; |
||
331 | |||
332 | shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); |
||
333 | if (!shader) |
||
334 | return NULL; |
||
335 | |||
336 | fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst)); |
||
337 | |||
338 | l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); |
||
339 | l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); |
||
340 | |||
341 | r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); |
||
342 | r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); |
||
343 | |||
344 | for (i = 0; i < idct->nr_of_render_targets; ++i) |
||
345 | fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); |
||
346 | |||
347 | for (i = 0; i < 4; ++i) { |
||
348 | l[i][0] = ureg_DECL_temporary(shader); |
||
349 | l[i][1] = ureg_DECL_temporary(shader); |
||
350 | } |
||
351 | |||
352 | r[0] = ureg_DECL_temporary(shader); |
||
353 | r[1] = ureg_DECL_temporary(shader); |
||
354 | |||
355 | for (i = 0; i < 4; ++i) { |
||
356 | increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height); |
||
357 | } |
||
358 | |||
359 | for (i = 0; i < 4; ++i) { |
||
360 | struct ureg_src s_addr[2]; |
||
361 | s_addr[0] = ureg_src(l[i][0]); |
||
362 | s_addr[1] = ureg_src(l[i][1]); |
||
363 | fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); |
||
364 | } |
||
365 | |||
366 | for (i = 0; i < idct->nr_of_render_targets; ++i) { |
||
367 | struct ureg_src s_addr[2]; |
||
368 | |||
369 | increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT); |
||
370 | |||
371 | s_addr[0] = ureg_src(r[0]); |
||
372 | s_addr[1] = ureg_src(r[1]); |
||
373 | fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); |
||
374 | |||
375 | for (j = 0; j < 4; ++j) { |
||
376 | matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); |
||
377 | } |
||
378 | } |
||
379 | |||
380 | for (i = 0; i < 4; ++i) { |
||
381 | ureg_release_temporary(shader, l[i][0]); |
||
382 | ureg_release_temporary(shader, l[i][1]); |
||
383 | } |
||
384 | ureg_release_temporary(shader, r[0]); |
||
385 | ureg_release_temporary(shader, r[1]); |
||
386 | |||
387 | ureg_END(shader); |
||
388 | |||
389 | FREE(fragment); |
||
390 | |||
391 | return ureg_create_shader_and_destroy(shader, idct->pipe); |
||
392 | } |
||
393 | |||
394 | void |
||
395 | vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, |
||
396 | unsigned first_output, struct ureg_dst tex) |
||
397 | { |
||
398 | struct ureg_src vrect, vpos; |
||
399 | struct ureg_src scale; |
||
400 | struct ureg_dst t_start; |
||
401 | struct ureg_dst o_l_addr[2], o_r_addr[2]; |
||
402 | |||
403 | vrect = ureg_DECL_vs_input(shader, VS_I_RECT); |
||
404 | vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); |
||
405 | |||
406 | t_start = ureg_DECL_temporary(shader); |
||
407 | |||
408 | --first_output; |
||
409 | |||
410 | o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); |
||
411 | o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); |
||
412 | |||
413 | o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); |
||
414 | o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); |
||
415 | |||
416 | scale = ureg_imm2f(shader, |
||
417 | (float)VL_BLOCK_WIDTH / idct->buffer_width, |
||
418 | (float)VL_BLOCK_HEIGHT / idct->buffer_height); |
||
419 | |||
420 | ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), |
||
421 | ureg_scalar(vrect, TGSI_SWIZZLE_X), |
||
422 | ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets)); |
||
423 | ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); |
||
424 | |||
425 | calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4); |
||
426 | calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); |
||
427 | |||
428 | ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); |
||
429 | ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); |
||
430 | } |
||
431 | |||
432 | void |
||
433 | vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, |
||
434 | unsigned first_input, struct ureg_dst fragment) |
||
435 | { |
||
436 | struct ureg_src l_addr[2], r_addr[2]; |
||
437 | |||
438 | struct ureg_dst l[2], r[2]; |
||
439 | |||
440 | --first_input; |
||
441 | |||
442 | l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); |
||
443 | l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); |
||
444 | |||
445 | r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); |
||
446 | r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); |
||
447 | |||
448 | l[0] = ureg_DECL_temporary(shader); |
||
449 | l[1] = ureg_DECL_temporary(shader); |
||
450 | r[0] = ureg_DECL_temporary(shader); |
||
451 | r[1] = ureg_DECL_temporary(shader); |
||
452 | |||
453 | fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false); |
||
454 | fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true); |
||
455 | |||
456 | matrix_mul(shader, fragment, l, r); |
||
457 | |||
458 | ureg_release_temporary(shader, l[0]); |
||
459 | ureg_release_temporary(shader, l[1]); |
||
460 | ureg_release_temporary(shader, r[0]); |
||
461 | ureg_release_temporary(shader, r[1]); |
||
462 | } |
||
463 | |||
464 | static bool |
||
465 | init_shaders(struct vl_idct *idct) |
||
466 | { |
||
467 | idct->vs_mismatch = create_mismatch_vert_shader(idct); |
||
468 | if (!idct->vs_mismatch) |
||
469 | goto error_vs_mismatch; |
||
470 | |||
471 | idct->fs_mismatch = create_mismatch_frag_shader(idct); |
||
472 | if (!idct->fs_mismatch) |
||
473 | goto error_fs_mismatch; |
||
474 | |||
475 | idct->vs = create_stage1_vert_shader(idct); |
||
476 | if (!idct->vs) |
||
477 | goto error_vs; |
||
478 | |||
479 | idct->fs = create_stage1_frag_shader(idct); |
||
480 | if (!idct->fs) |
||
481 | goto error_fs; |
||
482 | |||
483 | return true; |
||
484 | |||
485 | error_fs: |
||
486 | idct->pipe->delete_vs_state(idct->pipe, idct->vs); |
||
487 | |||
488 | error_vs: |
||
489 | idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); |
||
490 | |||
491 | error_fs_mismatch: |
||
492 | idct->pipe->delete_vs_state(idct->pipe, idct->fs); |
||
493 | |||
494 | error_vs_mismatch: |
||
495 | return false; |
||
496 | } |
||
497 | |||
498 | static void |
||
499 | cleanup_shaders(struct vl_idct *idct) |
||
500 | { |
||
501 | idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); |
||
502 | idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch); |
||
503 | idct->pipe->delete_vs_state(idct->pipe, idct->vs); |
||
504 | idct->pipe->delete_fs_state(idct->pipe, idct->fs); |
||
505 | } |
||
506 | |||
507 | static bool |
||
508 | init_state(struct vl_idct *idct) |
||
509 | { |
||
510 | struct pipe_blend_state blend; |
||
511 | struct pipe_rasterizer_state rs_state; |
||
512 | struct pipe_sampler_state sampler; |
||
513 | unsigned i; |
||
514 | |||
515 | assert(idct); |
||
516 | |||
517 | memset(&rs_state, 0, sizeof(rs_state)); |
||
518 | rs_state.point_size = 1; |
||
519 | rs_state.half_pixel_center = true; |
||
520 | rs_state.bottom_edge_rule = true; |
||
521 | rs_state.depth_clip = 1; |
||
522 | idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); |
||
523 | if (!idct->rs_state) |
||
524 | goto error_rs_state; |
||
525 | |||
526 | memset(&blend, 0, sizeof blend); |
||
527 | |||
528 | blend.independent_blend_enable = 0; |
||
529 | blend.rt[0].blend_enable = 0; |
||
530 | blend.rt[0].rgb_func = PIPE_BLEND_ADD; |
||
531 | blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; |
||
532 | blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; |
||
533 | blend.rt[0].alpha_func = PIPE_BLEND_ADD; |
||
534 | blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; |
||
535 | blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; |
||
536 | blend.logicop_enable = 0; |
||
537 | blend.logicop_func = PIPE_LOGICOP_CLEAR; |
||
538 | /* Needed to allow color writes to FB, even if blending disabled */ |
||
539 | blend.rt[0].colormask = PIPE_MASK_RGBA; |
||
540 | blend.dither = 0; |
||
541 | idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend); |
||
542 | if (!idct->blend) |
||
543 | goto error_blend; |
||
544 | |||
545 | for (i = 0; i < 2; ++i) { |
||
546 | memset(&sampler, 0, sizeof(sampler)); |
||
547 | sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; |
||
548 | sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; |
||
549 | sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; |
||
550 | sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; |
||
551 | sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; |
||
552 | sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; |
||
553 | sampler.compare_mode = PIPE_TEX_COMPARE_NONE; |
||
554 | sampler.compare_func = PIPE_FUNC_ALWAYS; |
||
555 | sampler.normalized_coords = 1; |
||
556 | idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); |
||
557 | if (!idct->samplers[i]) |
||
558 | goto error_samplers; |
||
559 | } |
||
560 | |||
561 | return true; |
||
562 | |||
563 | error_samplers: |
||
564 | for (i = 0; i < 2; ++i) |
||
565 | if (idct->samplers[i]) |
||
566 | idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); |
||
567 | |||
568 | idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); |
||
569 | |||
570 | error_blend: |
||
571 | idct->pipe->delete_blend_state(idct->pipe, idct->blend); |
||
572 | |||
573 | error_rs_state: |
||
574 | return false; |
||
575 | } |
||
576 | |||
577 | static void |
||
578 | cleanup_state(struct vl_idct *idct) |
||
579 | { |
||
580 | unsigned i; |
||
581 | |||
582 | for (i = 0; i < 2; ++i) |
||
583 | idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); |
||
584 | |||
585 | idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); |
||
586 | idct->pipe->delete_blend_state(idct->pipe, idct->blend); |
||
587 | } |
||
588 | |||
589 | static bool |
||
590 | init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) |
||
591 | { |
||
592 | struct pipe_resource *tex; |
||
593 | struct pipe_surface surf_templ; |
||
594 | |||
595 | assert(idct && buffer); |
||
596 | |||
597 | tex = buffer->sampler_views.individual.source->texture; |
||
598 | |||
599 | buffer->fb_state_mismatch.width = tex->width0; |
||
600 | buffer->fb_state_mismatch.height = tex->height0; |
||
601 | buffer->fb_state_mismatch.nr_cbufs = 1; |
||
602 | |||
603 | memset(&surf_templ, 0, sizeof(surf_templ)); |
||
604 | surf_templ.format = tex->format; |
||
605 | surf_templ.u.tex.first_layer = 0; |
||
606 | surf_templ.u.tex.last_layer = 0; |
||
607 | buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ); |
||
608 | |||
609 | buffer->viewport_mismatch.scale[0] = tex->width0; |
||
610 | buffer->viewport_mismatch.scale[1] = tex->height0; |
||
611 | buffer->viewport_mismatch.scale[2] = 1; |
||
612 | buffer->viewport_mismatch.scale[3] = 1; |
||
613 | |||
614 | return true; |
||
615 | } |
||
616 | |||
617 | static void |
||
618 | cleanup_source(struct vl_idct_buffer *buffer) |
||
619 | { |
||
620 | assert(buffer); |
||
621 | |||
622 | pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); |
||
623 | |||
624 | pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL); |
||
625 | } |
||
626 | |||
627 | static bool |
||
628 | init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) |
||
629 | { |
||
630 | struct pipe_resource *tex; |
||
631 | struct pipe_surface surf_templ; |
||
632 | unsigned i; |
||
633 | |||
634 | assert(idct && buffer); |
||
635 | |||
636 | tex = buffer->sampler_views.individual.intermediate->texture; |
||
637 | |||
638 | buffer->fb_state.width = tex->width0; |
||
639 | buffer->fb_state.height = tex->height0; |
||
640 | buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; |
||
641 | for(i = 0; i < idct->nr_of_render_targets; ++i) { |
||
642 | memset(&surf_templ, 0, sizeof(surf_templ)); |
||
643 | surf_templ.format = tex->format; |
||
644 | surf_templ.u.tex.first_layer = i; |
||
645 | surf_templ.u.tex.last_layer = i; |
||
646 | buffer->fb_state.cbufs[i] = idct->pipe->create_surface( |
||
647 | idct->pipe, tex, &surf_templ); |
||
648 | |||
649 | if (!buffer->fb_state.cbufs[i]) |
||
650 | goto error_surfaces; |
||
651 | } |
||
652 | |||
653 | buffer->viewport.scale[0] = tex->width0; |
||
654 | buffer->viewport.scale[1] = tex->height0; |
||
655 | buffer->viewport.scale[2] = 1; |
||
656 | buffer->viewport.scale[3] = 1; |
||
657 | |||
658 | return true; |
||
659 | |||
660 | error_surfaces: |
||
661 | for(i = 0; i < idct->nr_of_render_targets; ++i) |
||
662 | pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); |
||
663 | |||
664 | return false; |
||
665 | } |
||
666 | |||
667 | static void |
||
668 | cleanup_intermediate(struct vl_idct_buffer *buffer) |
||
669 | { |
||
670 | unsigned i; |
||
671 | |||
672 | assert(buffer); |
||
673 | |||
674 | for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) |
||
675 | pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); |
||
676 | |||
677 | pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); |
||
678 | } |
||
679 | |||
680 | struct pipe_sampler_view * |
||
681 | vl_idct_upload_matrix(struct pipe_context *pipe, float scale) |
||
682 | { |
||
683 | struct pipe_resource tex_templ, *matrix; |
||
684 | struct pipe_sampler_view sv_templ, *sv; |
||
685 | struct pipe_transfer *buf_transfer; |
||
686 | unsigned i, j, pitch; |
||
687 | float *f; |
||
688 | |||
689 | struct pipe_box rect = |
||
690 | { |
||
691 | 0, 0, 0, |
||
692 | VL_BLOCK_WIDTH / 4, |
||
693 | VL_BLOCK_HEIGHT, |
||
694 | 1 |
||
695 | }; |
||
696 | |||
697 | assert(pipe); |
||
698 | |||
699 | memset(&tex_templ, 0, sizeof(tex_templ)); |
||
700 | tex_templ.target = PIPE_TEXTURE_2D; |
||
701 | tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; |
||
702 | tex_templ.last_level = 0; |
||
703 | tex_templ.width0 = 2; |
||
704 | tex_templ.height0 = 8; |
||
705 | tex_templ.depth0 = 1; |
||
706 | tex_templ.array_size = 1; |
||
707 | tex_templ.usage = PIPE_USAGE_IMMUTABLE; |
||
708 | tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; |
||
709 | tex_templ.flags = 0; |
||
710 | |||
711 | matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); |
||
712 | if (!matrix) |
||
713 | goto error_matrix; |
||
714 | |||
715 | f = pipe->transfer_map(pipe, matrix, 0, |
||
716 | PIPE_TRANSFER_WRITE | |
||
717 | PIPE_TRANSFER_DISCARD_RANGE, |
||
718 | &rect, &buf_transfer); |
||
719 | if (!f) |
||
720 | goto error_map; |
||
721 | |||
722 | pitch = buf_transfer->stride / sizeof(float); |
||
723 | |||
724 | for(i = 0; i < VL_BLOCK_HEIGHT; ++i) |
||
725 | for(j = 0; j < VL_BLOCK_WIDTH; ++j) |
||
726 | // transpose and scale |
||
727 | f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale; |
||
728 | |||
729 | pipe->transfer_unmap(pipe, buf_transfer); |
||
730 | |||
731 | memset(&sv_templ, 0, sizeof(sv_templ)); |
||
732 | u_sampler_view_default_template(&sv_templ, matrix, matrix->format); |
||
733 | sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); |
||
734 | pipe_resource_reference(&matrix, NULL); |
||
735 | if (!sv) |
||
736 | goto error_map; |
||
737 | |||
738 | return sv; |
||
739 | |||
740 | error_map: |
||
741 | pipe_resource_reference(&matrix, NULL); |
||
742 | |||
743 | error_matrix: |
||
744 | return NULL; |
||
745 | } |
||
746 | |||
747 | bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, |
||
748 | unsigned buffer_width, unsigned buffer_height, |
||
749 | unsigned nr_of_render_targets, |
||
750 | struct pipe_sampler_view *matrix, |
||
751 | struct pipe_sampler_view *transpose) |
||
752 | { |
||
753 | assert(idct && pipe); |
||
754 | assert(matrix && transpose); |
||
755 | |||
756 | idct->pipe = pipe; |
||
757 | idct->buffer_width = buffer_width; |
||
758 | idct->buffer_height = buffer_height; |
||
759 | idct->nr_of_render_targets = nr_of_render_targets; |
||
760 | |||
761 | pipe_sampler_view_reference(&idct->matrix, matrix); |
||
762 | pipe_sampler_view_reference(&idct->transpose, transpose); |
||
763 | |||
764 | if(!init_shaders(idct)) |
||
765 | return false; |
||
766 | |||
767 | if(!init_state(idct)) { |
||
768 | cleanup_shaders(idct); |
||
769 | return false; |
||
770 | } |
||
771 | |||
772 | return true; |
||
773 | } |
||
774 | |||
775 | void |
||
776 | vl_idct_cleanup(struct vl_idct *idct) |
||
777 | { |
||
778 | cleanup_shaders(idct); |
||
779 | cleanup_state(idct); |
||
780 | |||
781 | pipe_sampler_view_reference(&idct->matrix, NULL); |
||
782 | pipe_sampler_view_reference(&idct->transpose, NULL); |
||
783 | } |
||
784 | |||
785 | bool |
||
786 | vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, |
||
787 | struct pipe_sampler_view *source, |
||
788 | struct pipe_sampler_view *intermediate) |
||
789 | { |
||
790 | assert(buffer && idct); |
||
791 | assert(source && intermediate); |
||
792 | |||
793 | memset(buffer, 0, sizeof(struct vl_idct_buffer)); |
||
794 | |||
795 | pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); |
||
796 | pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); |
||
797 | pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); |
||
798 | pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate); |
||
799 | |||
800 | if (!init_source(idct, buffer)) |
||
801 | return false; |
||
802 | |||
803 | if (!init_intermediate(idct, buffer)) |
||
804 | return false; |
||
805 | |||
806 | return true; |
||
807 | } |
||
808 | |||
809 | void |
||
810 | vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer) |
||
811 | { |
||
812 | assert(buffer); |
||
813 | |||
814 | cleanup_source(buffer); |
||
815 | cleanup_intermediate(buffer); |
||
816 | |||
817 | pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL); |
||
818 | pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL); |
||
819 | } |
||
820 | |||
821 | void |
||
822 | vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) |
||
823 | { |
||
824 | assert(buffer); |
||
825 | |||
826 | idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); |
||
827 | idct->pipe->bind_blend_state(idct->pipe, idct->blend); |
||
828 | idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); |
||
829 | idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); |
||
830 | |||
831 | /* mismatch control */ |
||
832 | idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); |
||
833 | idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch); |
||
834 | idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch); |
||
835 | idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch); |
||
836 | util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances); |
||
837 | |||
838 | /* first stage */ |
||
839 | idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); |
||
840 | idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport); |
||
841 | idct->pipe->bind_vs_state(idct->pipe, idct->vs); |
||
842 | idct->pipe->bind_fs_state(idct->pipe, idct->fs); |
||
843 | util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); |
||
844 | } |
||
845 | |||
846 | void |
||
847 | vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer) |
||
848 | { |
||
849 | assert(buffer); |
||
850 | |||
851 | /* second stage */ |
||
852 | idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); |
||
853 | idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); |
||
854 | idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); |
||
855 | }>>>>>>>>>><>>>>>>>>><>><>><>><>>>>>>> |
||
856 |