Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2010 Christian König
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include <assert.h>
  29.  
  30. #include "pipe/p_context.h"
  31. #include "pipe/p_screen.h"
  32.  
  33. #include "util/u_draw.h"
  34. #include "util/u_sampler.h"
  35. #include "util/u_memory.h"
  36.  
  37. #include "tgsi/tgsi_ureg.h"
  38.  
  39. #include "vl_defines.h"
  40. #include "vl_types.h"
  41. #include "vl_vertex_buffers.h"
  42. #include "vl_idct.h"
  43.  
  44. enum VS_OUTPUT
  45. {
  46.    VS_O_VPOS = 0,
  47.    VS_O_L_ADDR0 = 0,
  48.    VS_O_L_ADDR1,
  49.    VS_O_R_ADDR0,
  50.    VS_O_R_ADDR1
  51. };
  52.  
  53. /**
  54.  * The DCT matrix stored as hex representation of floats. Equal to the following equation:
  55.  * for (i = 0; i < 8; ++i)
  56.  *    for (j = 0; j < 8; ++j)
  57.  *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
  58.  *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
  59.  */
  60. static const uint32_t const_matrix[8][8] = {
  61.    { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
  62.    { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
  63.    { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
  64.    { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
  65.    { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
  66.    { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
  67.    { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
  68.    { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
  69. };
  70.  
  71. static void
  72. calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
  73.           struct ureg_src tc, struct ureg_src start, bool right_side,
  74.           bool transposed, float size)
  75. {
  76.    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
  77.    unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
  78.  
  79.    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
  80.    unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
  81.  
  82.    /*
  83.     * addr[0..1].(start) = right_side ? start.x : tc.x
  84.     * addr[0..1].(tc) = right_side ? tc.y : start.y
  85.     * addr[0..1].z = tc.z
  86.     * addr[1].(start) += 1.0f / scale
  87.     */
  88.    ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
  89.    ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
  90.  
  91.    ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
  92.    ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
  93. }
  94.  
  95. static void
  96. increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
  97.                struct ureg_src saddr[2], bool right_side, bool transposed,
  98.                int pos, float size)
  99. {
  100.    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
  101.    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
  102.  
  103.    /*
  104.     * daddr[0..1].(start) = saddr[0..1].(start)
  105.     * daddr[0..1].(tc) = saddr[0..1].(tc)
  106.     */
  107.  
  108.    ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
  109.    ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
  110.    ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
  111.    ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
  112. }
  113.  
  114. static void
  115. fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
  116.            struct ureg_src sampler, bool resource3d)
  117. {
  118.    ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
  119.    ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
  120. }
  121.  
  122. static void
  123. matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
  124. {
  125.    struct ureg_dst tmp;
  126.  
  127.    tmp = ureg_DECL_temporary(shader);
  128.  
  129.    /*
  130.     * tmp.xy = dot4(m[0][0..1], m[1][0..1])
  131.     * dst = tmp.x + tmp.y
  132.     */
  133.    ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
  134.    ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
  135.    ureg_ADD(shader, dst,
  136.       ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
  137.       ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
  138.  
  139.    ureg_release_temporary(shader, tmp);
  140. }
  141.  
  142. static void *
  143. create_mismatch_vert_shader(struct vl_idct *idct)
  144. {
  145.    struct ureg_program *shader;
  146.    struct ureg_src vpos;
  147.    struct ureg_src scale;
  148.    struct ureg_dst t_tex;
  149.    struct ureg_dst o_vpos, o_addr[2];
  150.  
  151.    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
  152.    if (!shader)
  153.       return NULL;
  154.  
  155.    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
  156.  
  157.    t_tex = ureg_DECL_temporary(shader);
  158.  
  159.    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
  160.  
  161.    o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
  162.    o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
  163.  
  164.    /*
  165.     * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
  166.     *
  167.     * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
  168.     * o_vpos.xy = t_vpos * scale
  169.     *
  170.     * o_addr = calc_addr(...)
  171.     *
  172.     */
  173.  
  174.    scale = ureg_imm2f(shader,
  175.       (float)VL_BLOCK_WIDTH / idct->buffer_width,
  176.       (float)VL_BLOCK_HEIGHT / idct->buffer_height);
  177.  
  178.    ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
  179.    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
  180.  
  181.    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
  182.    calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
  183.  
  184.    ureg_release_temporary(shader, t_tex);
  185.  
  186.    ureg_END(shader);
  187.  
  188.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  189. }
  190.  
  191. static void *
  192. create_mismatch_frag_shader(struct vl_idct *idct)
  193. {
  194.    struct ureg_program *shader;
  195.  
  196.    struct ureg_src addr[2];
  197.  
  198.    struct ureg_dst m[8][2];
  199.    struct ureg_dst fragment;
  200.  
  201.    unsigned i;
  202.  
  203.    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
  204.    if (!shader)
  205.       return NULL;
  206.  
  207.    addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
  208.    addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
  209.  
  210.    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
  211.  
  212.    for (i = 0; i < 8; ++i) {
  213.       m[i][0] = ureg_DECL_temporary(shader);
  214.       m[i][1] = ureg_DECL_temporary(shader);
  215.    }
  216.  
  217.    for (i = 0; i < 8; ++i) {
  218.       increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
  219.    }
  220.  
  221.    for (i = 0; i < 8; ++i) {
  222.       struct ureg_src s_addr[2];
  223.       s_addr[0] = ureg_src(m[i][0]);
  224.       s_addr[1] = ureg_src(m[i][1]);
  225.       fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
  226.    }
  227.  
  228.    for (i = 1; i < 8; ++i) {
  229.       ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
  230.       ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
  231.    }
  232.  
  233.    ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
  234.    ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
  235.  
  236.    ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
  237.    ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
  238.    ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
  239.  
  240.    ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
  241.             ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
  242.    ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
  243.             ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
  244.  
  245.    ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
  246.    ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
  247.  
  248.    for (i = 0; i < 8; ++i) {
  249.       ureg_release_temporary(shader, m[i][0]);
  250.       ureg_release_temporary(shader, m[i][1]);
  251.    }
  252.  
  253.    ureg_END(shader);
  254.  
  255.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  256. }
  257.  
  258. static void *
  259. create_stage1_vert_shader(struct vl_idct *idct)
  260. {
  261.    struct ureg_program *shader;
  262.    struct ureg_src vrect, vpos;
  263.    struct ureg_src scale;
  264.    struct ureg_dst t_tex, t_start;
  265.    struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
  266.  
  267.    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
  268.    if (!shader)
  269.       return NULL;
  270.  
  271.    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
  272.    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
  273.  
  274.    t_tex = ureg_DECL_temporary(shader);
  275.    t_start = ureg_DECL_temporary(shader);
  276.  
  277.    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
  278.  
  279.    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
  280.    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
  281.  
  282.    o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
  283.    o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
  284.  
  285.    /*
  286.     * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
  287.     *
  288.     * t_vpos = vpos + vrect
  289.     * o_vpos.xy = t_vpos * scale
  290.     * o_vpos.zw = vpos
  291.     *
  292.     * o_l_addr = calc_addr(...)
  293.     * o_r_addr = calc_addr(...)
  294.     *
  295.     */
  296.  
  297.    scale = ureg_imm2f(shader,
  298.       (float)VL_BLOCK_WIDTH / idct->buffer_width,
  299.       (float)VL_BLOCK_HEIGHT / idct->buffer_height);
  300.  
  301.    ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
  302.    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
  303.  
  304.    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
  305.    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
  306.  
  307.    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
  308.  
  309.    calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
  310.    calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);
  311.  
  312.    ureg_release_temporary(shader, t_tex);
  313.    ureg_release_temporary(shader, t_start);
  314.  
  315.    ureg_END(shader);
  316.  
  317.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  318. }
  319.  
  320. static void *
  321. create_stage1_frag_shader(struct vl_idct *idct)
  322. {
  323.    struct ureg_program *shader;
  324.  
  325.    struct ureg_src l_addr[2], r_addr[2];
  326.  
  327.    struct ureg_dst l[4][2], r[2];
  328.    struct ureg_dst *fragment;
  329.  
  330.    int i, j;
  331.  
  332.    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
  333.    if (!shader)
  334.       return NULL;
  335.  
  336.    fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));
  337.  
  338.    l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
  339.    l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
  340.  
  341.    r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
  342.    r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
  343.  
  344.    for (i = 0; i < idct->nr_of_render_targets; ++i)
  345.        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
  346.  
  347.    for (i = 0; i < 4; ++i) {
  348.       l[i][0] = ureg_DECL_temporary(shader);
  349.       l[i][1] = ureg_DECL_temporary(shader);
  350.    }
  351.  
  352.    r[0] = ureg_DECL_temporary(shader);
  353.    r[1] = ureg_DECL_temporary(shader);
  354.  
  355.    for (i = 0; i < 4; ++i) {
  356.       increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
  357.    }
  358.  
  359.    for (i = 0; i < 4; ++i) {
  360.       struct ureg_src s_addr[2];
  361.       s_addr[0] = ureg_src(l[i][0]);
  362.       s_addr[1] = ureg_src(l[i][1]);
  363.       fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
  364.    }
  365.  
  366.    for (i = 0; i < idct->nr_of_render_targets; ++i) {
  367.       struct ureg_src s_addr[2];
  368.  
  369.       increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);
  370.  
  371.       s_addr[0] = ureg_src(r[0]);
  372.       s_addr[1] = ureg_src(r[1]);
  373.       fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
  374.  
  375.       for (j = 0; j < 4; ++j) {
  376.          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
  377.       }
  378.    }
  379.  
  380.    for (i = 0; i < 4; ++i) {
  381.       ureg_release_temporary(shader, l[i][0]);
  382.       ureg_release_temporary(shader, l[i][1]);
  383.    }
  384.    ureg_release_temporary(shader, r[0]);
  385.    ureg_release_temporary(shader, r[1]);
  386.  
  387.    ureg_END(shader);
  388.  
  389.    FREE(fragment);
  390.  
  391.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  392. }
  393.  
  394. void
  395. vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
  396.                            unsigned first_output, struct ureg_dst tex)
  397. {
  398.    struct ureg_src vrect, vpos;
  399.    struct ureg_src scale;
  400.    struct ureg_dst t_start;
  401.    struct ureg_dst o_l_addr[2], o_r_addr[2];
  402.  
  403.    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
  404.    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
  405.  
  406.    t_start = ureg_DECL_temporary(shader);
  407.  
  408.    --first_output;
  409.  
  410.    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
  411.    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
  412.  
  413.    o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
  414.    o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
  415.  
  416.    scale = ureg_imm2f(shader,
  417.       (float)VL_BLOCK_WIDTH / idct->buffer_width,
  418.       (float)VL_BLOCK_HEIGHT / idct->buffer_height);
  419.  
  420.    ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
  421.       ureg_scalar(vrect, TGSI_SWIZZLE_X),
  422.       ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
  423.    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
  424.  
  425.    calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
  426.    calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
  427.  
  428.    ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
  429.    ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
  430. }
  431.  
  432. void
  433. vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
  434.                            unsigned first_input, struct ureg_dst fragment)
  435. {
  436.    struct ureg_src l_addr[2], r_addr[2];
  437.  
  438.    struct ureg_dst l[2], r[2];
  439.  
  440.    --first_input;
  441.  
  442.    l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
  443.    l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
  444.  
  445.    r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
  446.    r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
  447.  
  448.    l[0] = ureg_DECL_temporary(shader);
  449.    l[1] = ureg_DECL_temporary(shader);
  450.    r[0] = ureg_DECL_temporary(shader);
  451.    r[1] = ureg_DECL_temporary(shader);
  452.  
  453.    fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
  454.    fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
  455.  
  456.    matrix_mul(shader, fragment, l, r);
  457.  
  458.    ureg_release_temporary(shader, l[0]);
  459.    ureg_release_temporary(shader, l[1]);
  460.    ureg_release_temporary(shader, r[0]);
  461.    ureg_release_temporary(shader, r[1]);
  462. }
  463.  
  464. static bool
  465. init_shaders(struct vl_idct *idct)
  466. {
  467.    idct->vs_mismatch = create_mismatch_vert_shader(idct);
  468.    if (!idct->vs_mismatch)
  469.       goto error_vs_mismatch;
  470.  
  471.    idct->fs_mismatch = create_mismatch_frag_shader(idct);
  472.    if (!idct->fs_mismatch)
  473.       goto error_fs_mismatch;
  474.  
  475.    idct->vs = create_stage1_vert_shader(idct);
  476.    if (!idct->vs)
  477.       goto error_vs;
  478.  
  479.    idct->fs = create_stage1_frag_shader(idct);
  480.    if (!idct->fs)
  481.       goto error_fs;
  482.  
  483.    return true;
  484.  
  485. error_fs:
  486.    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
  487.  
  488. error_vs:
  489.    idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
  490.  
  491. error_fs_mismatch:
  492.    idct->pipe->delete_vs_state(idct->pipe, idct->fs);
  493.  
  494. error_vs_mismatch:
  495.    return false;
  496. }
  497.  
  498. static void
  499. cleanup_shaders(struct vl_idct *idct)
  500. {
  501.    idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
  502.    idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
  503.    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
  504.    idct->pipe->delete_fs_state(idct->pipe, idct->fs);
  505. }
  506.  
  507. static bool
  508. init_state(struct vl_idct *idct)
  509. {
  510.    struct pipe_blend_state blend;
  511.    struct pipe_rasterizer_state rs_state;
  512.    struct pipe_sampler_state sampler;
  513.    unsigned i;
  514.  
  515.    assert(idct);
  516.  
  517.    memset(&rs_state, 0, sizeof(rs_state));
  518.    rs_state.point_size = 1;
  519.    rs_state.half_pixel_center = true;
  520.    rs_state.bottom_edge_rule = true;
  521.    rs_state.depth_clip = 1;
  522.    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
  523.    if (!idct->rs_state)
  524.       goto error_rs_state;
  525.  
  526.    memset(&blend, 0, sizeof blend);
  527.  
  528.    blend.independent_blend_enable = 0;
  529.    blend.rt[0].blend_enable = 0;
  530.    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
  531.    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
  532.    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
  533.    blend.rt[0].alpha_func = PIPE_BLEND_ADD;
  534.    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
  535.    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
  536.    blend.logicop_enable = 0;
  537.    blend.logicop_func = PIPE_LOGICOP_CLEAR;
  538.    /* Needed to allow color writes to FB, even if blending disabled */
  539.    blend.rt[0].colormask = PIPE_MASK_RGBA;
  540.    blend.dither = 0;
  541.    idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
  542.    if (!idct->blend)
  543.       goto error_blend;
  544.  
  545.    for (i = 0; i < 2; ++i) {
  546.       memset(&sampler, 0, sizeof(sampler));
  547.       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
  548.       sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
  549.       sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
  550.       sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
  551.       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
  552.       sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
  553.       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
  554.       sampler.compare_func = PIPE_FUNC_ALWAYS;
  555.       sampler.normalized_coords = 1;
  556.       idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
  557.       if (!idct->samplers[i])
  558.          goto error_samplers;
  559.    }
  560.  
  561.    return true;
  562.  
  563. error_samplers:
  564.    for (i = 0; i < 2; ++i)
  565.       if (idct->samplers[i])
  566.          idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
  567.  
  568.    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
  569.  
  570. error_blend:
  571.    idct->pipe->delete_blend_state(idct->pipe, idct->blend);
  572.  
  573. error_rs_state:
  574.    return false;
  575. }
  576.  
  577. static void
  578. cleanup_state(struct vl_idct *idct)
  579. {
  580.    unsigned i;
  581.  
  582.    for (i = 0; i < 2; ++i)
  583.       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
  584.  
  585.    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
  586.    idct->pipe->delete_blend_state(idct->pipe, idct->blend);
  587. }
  588.  
  589. static bool
  590. init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
  591. {
  592.    struct pipe_resource *tex;
  593.    struct pipe_surface surf_templ;
  594.  
  595.    assert(idct && buffer);
  596.  
  597.    tex = buffer->sampler_views.individual.source->texture;
  598.  
  599.    buffer->fb_state_mismatch.width = tex->width0;
  600.    buffer->fb_state_mismatch.height = tex->height0;
  601.    buffer->fb_state_mismatch.nr_cbufs = 1;
  602.  
  603.    memset(&surf_templ, 0, sizeof(surf_templ));
  604.    surf_templ.format = tex->format;
  605.    surf_templ.u.tex.first_layer = 0;
  606.    surf_templ.u.tex.last_layer = 0;
  607.    buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
  608.  
  609.    buffer->viewport_mismatch.scale[0] = tex->width0;
  610.    buffer->viewport_mismatch.scale[1] = tex->height0;
  611.    buffer->viewport_mismatch.scale[2] = 1;
  612.  
  613.    return true;
  614. }
  615.  
  616. static void
  617. cleanup_source(struct vl_idct_buffer *buffer)
  618. {
  619.    assert(buffer);
  620.  
  621.    pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
  622.  
  623.    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
  624. }
  625.  
  626. static bool
  627. init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
  628. {
  629.    struct pipe_resource *tex;
  630.    struct pipe_surface surf_templ;
  631.    unsigned i;
  632.  
  633.    assert(idct && buffer);
  634.  
  635.    tex = buffer->sampler_views.individual.intermediate->texture;
  636.  
  637.    buffer->fb_state.width = tex->width0;
  638.    buffer->fb_state.height = tex->height0;
  639.    buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
  640.    for(i = 0; i < idct->nr_of_render_targets; ++i) {
  641.       memset(&surf_templ, 0, sizeof(surf_templ));
  642.       surf_templ.format = tex->format;
  643.       surf_templ.u.tex.first_layer = i;
  644.       surf_templ.u.tex.last_layer = i;
  645.       buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
  646.          idct->pipe, tex, &surf_templ);
  647.  
  648.       if (!buffer->fb_state.cbufs[i])
  649.          goto error_surfaces;
  650.    }
  651.  
  652.    buffer->viewport.scale[0] = tex->width0;
  653.    buffer->viewport.scale[1] = tex->height0;
  654.    buffer->viewport.scale[2] = 1;
  655.  
  656.    return true;
  657.  
  658. error_surfaces:
  659.    for(i = 0; i < idct->nr_of_render_targets; ++i)
  660.       pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
  661.  
  662.    return false;
  663. }
  664.  
  665. static void
  666. cleanup_intermediate(struct vl_idct_buffer *buffer)
  667. {
  668.    unsigned i;
  669.  
  670.    assert(buffer);
  671.  
  672.    for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
  673.       pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
  674.  
  675.    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
  676. }
  677.  
  678. struct pipe_sampler_view *
  679. vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
  680. {
  681.    struct pipe_resource tex_templ, *matrix;
  682.    struct pipe_sampler_view sv_templ, *sv;
  683.    struct pipe_transfer *buf_transfer;
  684.    unsigned i, j, pitch;
  685.    float *f;
  686.  
  687.    struct pipe_box rect =
  688.    {
  689.       0, 0, 0,
  690.       VL_BLOCK_WIDTH / 4,
  691.       VL_BLOCK_HEIGHT,
  692.       1
  693.    };
  694.  
  695.    assert(pipe);
  696.  
  697.    memset(&tex_templ, 0, sizeof(tex_templ));
  698.    tex_templ.target = PIPE_TEXTURE_2D;
  699.    tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
  700.    tex_templ.last_level = 0;
  701.    tex_templ.width0 = 2;
  702.    tex_templ.height0 = 8;
  703.    tex_templ.depth0 = 1;
  704.    tex_templ.array_size = 1;
  705.    tex_templ.usage = PIPE_USAGE_IMMUTABLE;
  706.    tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
  707.    tex_templ.flags = 0;
  708.  
  709.    matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
  710.    if (!matrix)
  711.       goto error_matrix;
  712.  
  713.    f = pipe->transfer_map(pipe, matrix, 0,
  714.                                      PIPE_TRANSFER_WRITE |
  715.                                      PIPE_TRANSFER_DISCARD_RANGE,
  716.                                      &rect, &buf_transfer);
  717.    if (!f)
  718.       goto error_map;
  719.  
  720.    pitch = buf_transfer->stride / sizeof(float);
  721.  
  722.    for(i = 0; i < VL_BLOCK_HEIGHT; ++i)
  723.       for(j = 0; j < VL_BLOCK_WIDTH; ++j)
  724.          // transpose and scale
  725.          f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
  726.  
  727.    pipe->transfer_unmap(pipe, buf_transfer);
  728.  
  729.    memset(&sv_templ, 0, sizeof(sv_templ));
  730.    u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
  731.    sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
  732.    pipe_resource_reference(&matrix, NULL);
  733.    if (!sv)
  734.       goto error_map;
  735.  
  736.    return sv;
  737.  
  738. error_map:
  739.    pipe_resource_reference(&matrix, NULL);
  740.  
  741. error_matrix:
  742.    return NULL;
  743. }
  744.  
  745. bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
  746.                   unsigned buffer_width, unsigned buffer_height,
  747.                   unsigned nr_of_render_targets,
  748.                   struct pipe_sampler_view *matrix,
  749.                   struct pipe_sampler_view *transpose)
  750. {
  751.    assert(idct && pipe);
  752.    assert(matrix && transpose);
  753.  
  754.    idct->pipe = pipe;
  755.    idct->buffer_width = buffer_width;
  756.    idct->buffer_height = buffer_height;
  757.    idct->nr_of_render_targets = nr_of_render_targets;
  758.  
  759.    pipe_sampler_view_reference(&idct->matrix, matrix);
  760.    pipe_sampler_view_reference(&idct->transpose, transpose);
  761.  
  762.    if(!init_shaders(idct))
  763.       return false;
  764.  
  765.    if(!init_state(idct)) {
  766.       cleanup_shaders(idct);
  767.       return false;
  768.    }
  769.  
  770.    return true;
  771. }
  772.  
  773. void
  774. vl_idct_cleanup(struct vl_idct *idct)
  775. {
  776.    cleanup_shaders(idct);
  777.    cleanup_state(idct);
  778.  
  779.    pipe_sampler_view_reference(&idct->matrix, NULL);
  780.    pipe_sampler_view_reference(&idct->transpose, NULL);
  781. }
  782.  
  783. bool
  784. vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
  785.                     struct pipe_sampler_view *source,
  786.                     struct pipe_sampler_view *intermediate)
  787. {
  788.    assert(buffer && idct);
  789.    assert(source && intermediate);
  790.  
  791.    memset(buffer, 0, sizeof(struct vl_idct_buffer));
  792.  
  793.    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
  794.    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
  795.    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
  796.    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
  797.  
  798.    if (!init_source(idct, buffer))
  799.       return false;
  800.  
  801.    if (!init_intermediate(idct, buffer))
  802.       return false;
  803.  
  804.    return true;
  805. }
  806.  
  807. void
  808. vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
  809. {
  810.    assert(buffer);
  811.  
  812.    cleanup_source(buffer);
  813.    cleanup_intermediate(buffer);
  814.  
  815.    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
  816.    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
  817. }
  818.  
  819. void
  820. vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
  821. {
  822.    assert(buffer);
  823.  
  824.    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
  825.    idct->pipe->bind_blend_state(idct->pipe, idct->blend);
  826.  
  827.    idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
  828.                                    0, 2, idct->samplers);
  829.  
  830.    idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2,
  831.                                  buffer->sampler_views.stage[0]);
  832.  
  833.    /* mismatch control */
  834.    idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
  835.    idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);
  836.    idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
  837.    idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
  838.    util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
  839.  
  840.    /* first stage */
  841.    idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
  842.    idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);
  843.    idct->pipe->bind_vs_state(idct->pipe, idct->vs);
  844.    idct->pipe->bind_fs_state(idct->pipe, idct->fs);
  845.    util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
  846. }
  847.  
  848. void
  849. vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
  850. {
  851.    assert(buffer);
  852.  
  853.    /* second stage */
  854.    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
  855.    idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
  856.                                    0, 2, idct->samplers);
  857.    idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT,
  858.                                  0, 2, buffer->sampler_views.stage[1]);
  859. }
  860.  
  861.