Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3770 Serge 1
/**************************************************************************
2
 *
3
 * Copyright 2010 Christian König
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
#include 
29
 
30
#include "pipe/p_context.h"
31
#include "pipe/p_screen.h"
32
 
33
#include "util/u_draw.h"
34
#include "util/u_sampler.h"
35
#include "util/u_memory.h"
36
 
37
#include "tgsi/tgsi_ureg.h"
38
 
39
#include "vl_defines.h"
40
#include "vl_types.h"
41
#include "vl_vertex_buffers.h"
42
#include "vl_idct.h"
43
 
44
enum VS_OUTPUT
45
{
46
   VS_O_VPOS = 0,
47
   VS_O_L_ADDR0 = 0,
48
   VS_O_L_ADDR1,
49
   VS_O_R_ADDR0,
50
   VS_O_R_ADDR1
51
};
52
 
53
/**
54
 * The DCT matrix stored as hex representation of floats. Equal to the following equation:
55
 * for (i = 0; i < 8; ++i)
56
 *    for (j = 0; j < 8; ++j)
57
 *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
58
 *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
59
 */
60
static const uint32_t const_matrix[8][8] = {
61
   { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
62
   { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
63
   { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
64
   { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
65
   { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
66
   { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
67
   { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
68
   { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
69
};
70
 
71
static void
72
calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
73
          struct ureg_src tc, struct ureg_src start, bool right_side,
74
          bool transposed, float size)
75
{
76
   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
77
   unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
78
 
79
   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
80
   unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
81
 
82
   /*
83
    * addr[0..1].(start) = right_side ? start.x : tc.x
84
    * addr[0..1].(tc) = right_side ? tc.y : start.y
85
    * addr[0..1].z = tc.z
86
    * addr[1].(start) += 1.0f / scale
87
    */
88
   ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
89
   ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
90
 
91
   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
92
   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
93
}
94
 
95
static void
96
increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
97
               struct ureg_src saddr[2], bool right_side, bool transposed,
98
               int pos, float size)
99
{
100
   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
101
   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
102
 
103
   /*
104
    * daddr[0..1].(start) = saddr[0..1].(start)
105
    * daddr[0..1].(tc) = saddr[0..1].(tc)
106
    */
107
 
108
   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
109
   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
110
   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
111
   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
112
}
113
 
114
static void
115
fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
116
           struct ureg_src sampler, bool resource3d)
117
{
118
   ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
119
   ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
120
}
121
 
122
static void
123
matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
124
{
125
   struct ureg_dst tmp;
126
 
127
   tmp = ureg_DECL_temporary(shader);
128
 
129
   /*
130
    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
131
    * dst = tmp.x + tmp.y
132
    */
133
   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
134
   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
135
   ureg_ADD(shader, dst,
136
      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
137
      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
138
 
139
   ureg_release_temporary(shader, tmp);
140
}
141
 
142
static void *
143
create_mismatch_vert_shader(struct vl_idct *idct)
144
{
145
   struct ureg_program *shader;
146
   struct ureg_src vpos;
147
   struct ureg_src scale;
148
   struct ureg_dst t_tex;
149
   struct ureg_dst o_vpos, o_addr[2];
150
 
151
   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
152
   if (!shader)
153
      return NULL;
154
 
155
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
156
 
157
   t_tex = ureg_DECL_temporary(shader);
158
 
159
   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
160
 
161
   o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
162
   o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
163
 
164
   /*
165
    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
166
    *
167
    * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
168
    * o_vpos.xy = t_vpos * scale
169
    *
170
    * o_addr = calc_addr(...)
171
    *
172
    */
173
 
174
   scale = ureg_imm2f(shader,
175
      (float)VL_BLOCK_WIDTH / idct->buffer_width,
176
      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
177
 
178
   ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
179
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
180
 
181
   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
182
   calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
183
 
184
   ureg_release_temporary(shader, t_tex);
185
 
186
   ureg_END(shader);
187
 
188
   return ureg_create_shader_and_destroy(shader, idct->pipe);
189
}
190
 
191
static void *
192
create_mismatch_frag_shader(struct vl_idct *idct)
193
{
194
   struct ureg_program *shader;
195
 
196
   struct ureg_src addr[2];
197
 
198
   struct ureg_dst m[8][2];
199
   struct ureg_dst fragment;
200
 
201
   unsigned i;
202
 
203
   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
204
   if (!shader)
205
      return NULL;
206
 
207
   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
208
   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
209
 
210
   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
211
 
212
   for (i = 0; i < 8; ++i) {
213
      m[i][0] = ureg_DECL_temporary(shader);
214
      m[i][1] = ureg_DECL_temporary(shader);
215
   }
216
 
217
   for (i = 0; i < 8; ++i) {
218
      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
219
   }
220
 
221
   for (i = 0; i < 8; ++i) {
222
      struct ureg_src s_addr[2];
223
      s_addr[0] = ureg_src(m[i][0]);
224
      s_addr[1] = ureg_src(m[i][1]);
225
      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
226
   }
227
 
228
   for (i = 1; i < 8; ++i) {
229
      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
230
      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
231
   }
232
 
233
   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
234
   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
235
 
236
   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
237
   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
238
   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
239
 
240
   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
241
            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
242
   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
243
            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
244
 
245
   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
246
   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
247
 
248
   for (i = 0; i < 8; ++i) {
249
      ureg_release_temporary(shader, m[i][0]);
250
      ureg_release_temporary(shader, m[i][1]);
251
   }
252
 
253
   ureg_END(shader);
254
 
255
   return ureg_create_shader_and_destroy(shader, idct->pipe);
256
}
257
 
258
static void *
259
create_stage1_vert_shader(struct vl_idct *idct)
260
{
261
   struct ureg_program *shader;
262
   struct ureg_src vrect, vpos;
263
   struct ureg_src scale;
264
   struct ureg_dst t_tex, t_start;
265
   struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
266
 
267
   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
268
   if (!shader)
269
      return NULL;
270
 
271
   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
272
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
273
 
274
   t_tex = ureg_DECL_temporary(shader);
275
   t_start = ureg_DECL_temporary(shader);
276
 
277
   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
278
 
279
   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
280
   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
281
 
282
   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
283
   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
284
 
285
   /*
286
    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
287
    *
288
    * t_vpos = vpos + vrect
289
    * o_vpos.xy = t_vpos * scale
290
    * o_vpos.zw = vpos
291
    *
292
    * o_l_addr = calc_addr(...)
293
    * o_r_addr = calc_addr(...)
294
    *
295
    */
296
 
297
   scale = ureg_imm2f(shader,
298
      (float)VL_BLOCK_WIDTH / idct->buffer_width,
299
      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
300
 
301
   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
302
   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
303
 
304
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
305
   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
306
 
307
   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
308
 
309
   calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
310
   calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);
311
 
312
   ureg_release_temporary(shader, t_tex);
313
   ureg_release_temporary(shader, t_start);
314
 
315
   ureg_END(shader);
316
 
317
   return ureg_create_shader_and_destroy(shader, idct->pipe);
318
}
319
 
320
static void *
321
create_stage1_frag_shader(struct vl_idct *idct)
322
{
323
   struct ureg_program *shader;
324
 
325
   struct ureg_src l_addr[2], r_addr[2];
326
 
327
   struct ureg_dst l[4][2], r[2];
328
   struct ureg_dst *fragment;
329
 
330
   int i, j;
331
 
332
   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
333
   if (!shader)
334
      return NULL;
335
 
336
   fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));
337
 
338
   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
339
   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
340
 
341
   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
342
   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
343
 
344
   for (i = 0; i < idct->nr_of_render_targets; ++i)
345
       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
346
 
347
   for (i = 0; i < 4; ++i) {
348
      l[i][0] = ureg_DECL_temporary(shader);
349
      l[i][1] = ureg_DECL_temporary(shader);
350
   }
351
 
352
   r[0] = ureg_DECL_temporary(shader);
353
   r[1] = ureg_DECL_temporary(shader);
354
 
355
   for (i = 0; i < 4; ++i) {
356
      increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
357
   }
358
 
359
   for (i = 0; i < 4; ++i) {
360
      struct ureg_src s_addr[2];
361
      s_addr[0] = ureg_src(l[i][0]);
362
      s_addr[1] = ureg_src(l[i][1]);
363
      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
364
   }
365
 
366
   for (i = 0; i < idct->nr_of_render_targets; ++i) {
367
      struct ureg_src s_addr[2];
368
 
369
      increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);
370
 
371
      s_addr[0] = ureg_src(r[0]);
372
      s_addr[1] = ureg_src(r[1]);
373
      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
374
 
375
      for (j = 0; j < 4; ++j) {
376
         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
377
      }
378
   }
379
 
380
   for (i = 0; i < 4; ++i) {
381
      ureg_release_temporary(shader, l[i][0]);
382
      ureg_release_temporary(shader, l[i][1]);
383
   }
384
   ureg_release_temporary(shader, r[0]);
385
   ureg_release_temporary(shader, r[1]);
386
 
387
   ureg_END(shader);
388
 
389
   FREE(fragment);
390
 
391
   return ureg_create_shader_and_destroy(shader, idct->pipe);
392
}
393
 
394
void
395
vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
396
                           unsigned first_output, struct ureg_dst tex)
397
{
398
   struct ureg_src vrect, vpos;
399
   struct ureg_src scale;
400
   struct ureg_dst t_start;
401
   struct ureg_dst o_l_addr[2], o_r_addr[2];
402
 
403
   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
404
   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
405
 
406
   t_start = ureg_DECL_temporary(shader);
407
 
408
   --first_output;
409
 
410
   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
411
   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
412
 
413
   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
414
   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
415
 
416
   scale = ureg_imm2f(shader,
417
      (float)VL_BLOCK_WIDTH / idct->buffer_width,
418
      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
419
 
420
   ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
421
      ureg_scalar(vrect, TGSI_SWIZZLE_X),
422
      ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
423
   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
424
 
425
   calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
426
   calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
427
 
428
   ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
429
   ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
430
}
431
 
432
void
433
vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
434
                           unsigned first_input, struct ureg_dst fragment)
435
{
436
   struct ureg_src l_addr[2], r_addr[2];
437
 
438
   struct ureg_dst l[2], r[2];
439
 
440
   --first_input;
441
 
442
   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
443
   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
444
 
445
   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
446
   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
447
 
448
   l[0] = ureg_DECL_temporary(shader);
449
   l[1] = ureg_DECL_temporary(shader);
450
   r[0] = ureg_DECL_temporary(shader);
451
   r[1] = ureg_DECL_temporary(shader);
452
 
453
   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
454
   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
455
 
456
   matrix_mul(shader, fragment, l, r);
457
 
458
   ureg_release_temporary(shader, l[0]);
459
   ureg_release_temporary(shader, l[1]);
460
   ureg_release_temporary(shader, r[0]);
461
   ureg_release_temporary(shader, r[1]);
462
}
463
 
464
static bool
465
init_shaders(struct vl_idct *idct)
466
{
467
   idct->vs_mismatch = create_mismatch_vert_shader(idct);
468
   if (!idct->vs_mismatch)
469
      goto error_vs_mismatch;
470
 
471
   idct->fs_mismatch = create_mismatch_frag_shader(idct);
472
   if (!idct->fs_mismatch)
473
      goto error_fs_mismatch;
474
 
475
   idct->vs = create_stage1_vert_shader(idct);
476
   if (!idct->vs)
477
      goto error_vs;
478
 
479
   idct->fs = create_stage1_frag_shader(idct);
480
   if (!idct->fs)
481
      goto error_fs;
482
 
483
   return true;
484
 
485
error_fs:
486
   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
487
 
488
error_vs:
489
   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
490
 
491
error_fs_mismatch:
492
   idct->pipe->delete_vs_state(idct->pipe, idct->fs);
493
 
494
error_vs_mismatch:
495
   return false;
496
}
497
 
498
static void
499
cleanup_shaders(struct vl_idct *idct)
500
{
501
   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
502
   idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
503
   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
504
   idct->pipe->delete_fs_state(idct->pipe, idct->fs);
505
}
506
 
507
static bool
508
init_state(struct vl_idct *idct)
509
{
510
   struct pipe_blend_state blend;
511
   struct pipe_rasterizer_state rs_state;
512
   struct pipe_sampler_state sampler;
513
   unsigned i;
514
 
515
   assert(idct);
516
 
517
   memset(&rs_state, 0, sizeof(rs_state));
518
   rs_state.point_size = 1;
519
   rs_state.half_pixel_center = true;
520
   rs_state.bottom_edge_rule = true;
521
   rs_state.depth_clip = 1;
522
   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
523
   if (!idct->rs_state)
524
      goto error_rs_state;
525
 
526
   memset(&blend, 0, sizeof blend);
527
 
528
   blend.independent_blend_enable = 0;
529
   blend.rt[0].blend_enable = 0;
530
   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
531
   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
532
   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
533
   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
534
   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
535
   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
536
   blend.logicop_enable = 0;
537
   blend.logicop_func = PIPE_LOGICOP_CLEAR;
538
   /* Needed to allow color writes to FB, even if blending disabled */
539
   blend.rt[0].colormask = PIPE_MASK_RGBA;
540
   blend.dither = 0;
541
   idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
542
   if (!idct->blend)
543
      goto error_blend;
544
 
545
   for (i = 0; i < 2; ++i) {
546
      memset(&sampler, 0, sizeof(sampler));
547
      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
548
      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
549
      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
550
      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
551
      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
552
      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
553
      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
554
      sampler.compare_func = PIPE_FUNC_ALWAYS;
555
      sampler.normalized_coords = 1;
556
      idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
557
      if (!idct->samplers[i])
558
         goto error_samplers;
559
   }
560
 
561
   return true;
562
 
563
error_samplers:
564
   for (i = 0; i < 2; ++i)
565
      if (idct->samplers[i])
566
         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
567
 
568
   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
569
 
570
error_blend:
571
   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
572
 
573
error_rs_state:
574
   return false;
575
}
576
 
577
static void
578
cleanup_state(struct vl_idct *idct)
579
{
580
   unsigned i;
581
 
582
   for (i = 0; i < 2; ++i)
583
      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
584
 
585
   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
586
   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
587
}
588
 
589
static bool
590
init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
591
{
592
   struct pipe_resource *tex;
593
   struct pipe_surface surf_templ;
594
 
595
   assert(idct && buffer);
596
 
597
   tex = buffer->sampler_views.individual.source->texture;
598
 
599
   buffer->fb_state_mismatch.width = tex->width0;
600
   buffer->fb_state_mismatch.height = tex->height0;
601
   buffer->fb_state_mismatch.nr_cbufs = 1;
602
 
603
   memset(&surf_templ, 0, sizeof(surf_templ));
604
   surf_templ.format = tex->format;
605
   surf_templ.u.tex.first_layer = 0;
606
   surf_templ.u.tex.last_layer = 0;
607
   buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
608
 
609
   buffer->viewport_mismatch.scale[0] = tex->width0;
610
   buffer->viewport_mismatch.scale[1] = tex->height0;
611
   buffer->viewport_mismatch.scale[2] = 1;
612
   buffer->viewport_mismatch.scale[3] = 1;
613
 
614
   return true;
615
}
616
 
617
static void
618
cleanup_source(struct vl_idct_buffer *buffer)
619
{
620
   assert(buffer);
621
 
622
   pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
623
 
624
   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
625
}
626
 
627
static bool
628
init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
629
{
630
   struct pipe_resource *tex;
631
   struct pipe_surface surf_templ;
632
   unsigned i;
633
 
634
   assert(idct && buffer);
635
 
636
   tex = buffer->sampler_views.individual.intermediate->texture;
637
 
638
   buffer->fb_state.width = tex->width0;
639
   buffer->fb_state.height = tex->height0;
640
   buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
641
   for(i = 0; i < idct->nr_of_render_targets; ++i) {
642
      memset(&surf_templ, 0, sizeof(surf_templ));
643
      surf_templ.format = tex->format;
644
      surf_templ.u.tex.first_layer = i;
645
      surf_templ.u.tex.last_layer = i;
646
      buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
647
         idct->pipe, tex, &surf_templ);
648
 
649
      if (!buffer->fb_state.cbufs[i])
650
         goto error_surfaces;
651
   }
652
 
653
   buffer->viewport.scale[0] = tex->width0;
654
   buffer->viewport.scale[1] = tex->height0;
655
   buffer->viewport.scale[2] = 1;
656
   buffer->viewport.scale[3] = 1;
657
 
658
   return true;
659
 
660
error_surfaces:
661
   for(i = 0; i < idct->nr_of_render_targets; ++i)
662
      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
663
 
664
   return false;
665
}
666
 
667
static void
668
cleanup_intermediate(struct vl_idct_buffer *buffer)
669
{
670
   unsigned i;
671
 
672
   assert(buffer);
673
 
674
   for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
675
      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
676
 
677
   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
678
}
679
 
680
struct pipe_sampler_view *
681
vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
682
{
683
   struct pipe_resource tex_templ, *matrix;
684
   struct pipe_sampler_view sv_templ, *sv;
685
   struct pipe_transfer *buf_transfer;
686
   unsigned i, j, pitch;
687
   float *f;
688
 
689
   struct pipe_box rect =
690
   {
691
      0, 0, 0,
692
      VL_BLOCK_WIDTH / 4,
693
      VL_BLOCK_HEIGHT,
694
      1
695
   };
696
 
697
   assert(pipe);
698
 
699
   memset(&tex_templ, 0, sizeof(tex_templ));
700
   tex_templ.target = PIPE_TEXTURE_2D;
701
   tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
702
   tex_templ.last_level = 0;
703
   tex_templ.width0 = 2;
704
   tex_templ.height0 = 8;
705
   tex_templ.depth0 = 1;
706
   tex_templ.array_size = 1;
707
   tex_templ.usage = PIPE_USAGE_IMMUTABLE;
708
   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
709
   tex_templ.flags = 0;
710
 
711
   matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
712
   if (!matrix)
713
      goto error_matrix;
714
 
715
   f = pipe->transfer_map(pipe, matrix, 0,
716
                                     PIPE_TRANSFER_WRITE |
717
                                     PIPE_TRANSFER_DISCARD_RANGE,
718
                                     &rect, &buf_transfer);
719
   if (!f)
720
      goto error_map;
721
 
722
   pitch = buf_transfer->stride / sizeof(float);
723
 
724
   for(i = 0; i < VL_BLOCK_HEIGHT; ++i)
725
      for(j = 0; j < VL_BLOCK_WIDTH; ++j)
726
         // transpose and scale
727
         f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
728
 
729
   pipe->transfer_unmap(pipe, buf_transfer);
730
 
731
   memset(&sv_templ, 0, sizeof(sv_templ));
732
   u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
733
   sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
734
   pipe_resource_reference(&matrix, NULL);
735
   if (!sv)
736
      goto error_map;
737
 
738
   return sv;
739
 
740
error_map:
741
   pipe_resource_reference(&matrix, NULL);
742
 
743
error_matrix:
744
   return NULL;
745
}
746
 
747
bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
748
                  unsigned buffer_width, unsigned buffer_height,
749
                  unsigned nr_of_render_targets,
750
                  struct pipe_sampler_view *matrix,
751
                  struct pipe_sampler_view *transpose)
752
{
753
   assert(idct && pipe);
754
   assert(matrix && transpose);
755
 
756
   idct->pipe = pipe;
757
   idct->buffer_width = buffer_width;
758
   idct->buffer_height = buffer_height;
759
   idct->nr_of_render_targets = nr_of_render_targets;
760
 
761
   pipe_sampler_view_reference(&idct->matrix, matrix);
762
   pipe_sampler_view_reference(&idct->transpose, transpose);
763
 
764
   if(!init_shaders(idct))
765
      return false;
766
 
767
   if(!init_state(idct)) {
768
      cleanup_shaders(idct);
769
      return false;
770
   }
771
 
772
   return true;
773
}
774
 
775
void
776
vl_idct_cleanup(struct vl_idct *idct)
777
{
778
   cleanup_shaders(idct);
779
   cleanup_state(idct);
780
 
781
   pipe_sampler_view_reference(&idct->matrix, NULL);
782
   pipe_sampler_view_reference(&idct->transpose, NULL);
783
}
784
 
785
bool
786
vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
787
                    struct pipe_sampler_view *source,
788
                    struct pipe_sampler_view *intermediate)
789
{
790
   assert(buffer && idct);
791
   assert(source && intermediate);
792
 
793
   memset(buffer, 0, sizeof(struct vl_idct_buffer));
794
 
795
   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
796
   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
797
   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
798
   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
799
 
800
   if (!init_source(idct, buffer))
801
      return false;
802
 
803
   if (!init_intermediate(idct, buffer))
804
      return false;
805
 
806
   return true;
807
}
808
 
809
void
810
vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
811
{
812
   assert(buffer);
813
 
814
   cleanup_source(buffer);
815
   cleanup_intermediate(buffer);
816
 
817
   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
818
   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
819
}
820
 
821
void
822
vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
823
{
824
   assert(buffer);
825
 
826
   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
827
   idct->pipe->bind_blend_state(idct->pipe, idct->blend);
828
   idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
829
   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
830
 
831
   /* mismatch control */
832
   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
833
   idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);
834
   idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
835
   idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
836
   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
837
 
838
   /* first stage */
839
   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
840
   idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);
841
   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
842
   idct->pipe->bind_fs_state(idct->pipe, idct->fs);
843
   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
844
}
845
 
846
void
847
vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
848
{
849
   assert(buffer);
850
 
851
   /* second stage */
852
   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
853
   idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
854
   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
855
}
856