Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2010 Christian König
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include <assert.h>
  29.  
  30. #include "pipe/p_context.h"
  31. #include "pipe/p_screen.h"
  32.  
  33. #include "util/u_draw.h"
  34. #include "util/u_sampler.h"
  35. #include "util/u_memory.h"
  36.  
  37. #include "tgsi/tgsi_ureg.h"
  38.  
  39. #include "vl_defines.h"
  40. #include "vl_types.h"
  41. #include "vl_vertex_buffers.h"
  42. #include "vl_idct.h"
  43.  
  44. enum VS_OUTPUT
  45. {
  46.    VS_O_VPOS = 0,
  47.    VS_O_L_ADDR0 = 0,
  48.    VS_O_L_ADDR1,
  49.    VS_O_R_ADDR0,
  50.    VS_O_R_ADDR1
  51. };
  52.  
  53. /**
  54.  * The DCT matrix stored as hex representation of floats. Equal to the following equation:
  55.  * for (i = 0; i < 8; ++i)
  56.  *    for (j = 0; j < 8; ++j)
  57.  *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
  58.  *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
  59.  */
  60. static const uint32_t const_matrix[8][8] = {
  61.    { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
  62.    { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
  63.    { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
  64.    { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
  65.    { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
  66.    { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
  67.    { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
  68.    { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
  69. };
  70.  
  71. static void
  72. calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
  73.           struct ureg_src tc, struct ureg_src start, bool right_side,
  74.           bool transposed, float size)
  75. {
  76.    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
  77.    unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
  78.  
  79.    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
  80.    unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
  81.  
  82.    /*
  83.     * addr[0..1].(start) = right_side ? start.x : tc.x
  84.     * addr[0..1].(tc) = right_side ? tc.y : start.y
  85.     * addr[0..1].z = tc.z
  86.     * addr[1].(start) += 1.0f / scale
  87.     */
  88.    ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
  89.    ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
  90.  
  91.    ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
  92.    ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
  93. }
  94.  
  95. static void
  96. increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
  97.                struct ureg_src saddr[2], bool right_side, bool transposed,
  98.                int pos, float size)
  99. {
  100.    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
  101.    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
  102.  
  103.    /*
  104.     * daddr[0..1].(start) = saddr[0..1].(start)
  105.     * daddr[0..1].(tc) = saddr[0..1].(tc)
  106.     */
  107.  
  108.    ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
  109.    ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
  110.    ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
  111.    ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
  112. }
  113.  
  114. static void
  115. fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
  116.            struct ureg_src sampler, bool resource3d)
  117. {
  118.    ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
  119.    ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
  120. }
  121.  
  122. static void
  123. matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
  124. {
  125.    struct ureg_dst tmp;
  126.  
  127.    tmp = ureg_DECL_temporary(shader);
  128.  
  129.    /*
  130.     * tmp.xy = dot4(m[0][0..1], m[1][0..1])
  131.     * dst = tmp.x + tmp.y
  132.     */
  133.    ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
  134.    ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
  135.    ureg_ADD(shader, dst,
  136.       ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
  137.       ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
  138.  
  139.    ureg_release_temporary(shader, tmp);
  140. }
  141.  
  142. static void *
  143. create_mismatch_vert_shader(struct vl_idct *idct)
  144. {
  145.    struct ureg_program *shader;
  146.    struct ureg_src vpos;
  147.    struct ureg_src scale;
  148.    struct ureg_dst t_tex;
  149.    struct ureg_dst o_vpos, o_addr[2];
  150.  
  151.    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
  152.    if (!shader)
  153.       return NULL;
  154.  
  155.    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
  156.  
  157.    t_tex = ureg_DECL_temporary(shader);
  158.  
  159.    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
  160.  
  161.    o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
  162.    o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
  163.  
  164.    /*
  165.     * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
  166.     *
  167.     * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
  168.     * o_vpos.xy = t_vpos * scale
  169.     *
  170.     * o_addr = calc_addr(...)
  171.     *
  172.     */
  173.  
  174.    scale = ureg_imm2f(shader,
  175.       (float)VL_BLOCK_WIDTH / idct->buffer_width,
  176.       (float)VL_BLOCK_HEIGHT / idct->buffer_height);
  177.  
  178.    ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
  179.    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
  180.  
  181.    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
  182.    calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
  183.  
  184.    ureg_release_temporary(shader, t_tex);
  185.  
  186.    ureg_END(shader);
  187.  
  188.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  189. }
  190.  
  191. static void *
  192. create_mismatch_frag_shader(struct vl_idct *idct)
  193. {
  194.    struct ureg_program *shader;
  195.  
  196.    struct ureg_src addr[2];
  197.  
  198.    struct ureg_dst m[8][2];
  199.    struct ureg_dst fragment;
  200.  
  201.    unsigned i;
  202.  
  203.    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
  204.    if (!shader)
  205.       return NULL;
  206.  
  207.    addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
  208.    addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
  209.  
  210.    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
  211.  
  212.    for (i = 0; i < 8; ++i) {
  213.       m[i][0] = ureg_DECL_temporary(shader);
  214.       m[i][1] = ureg_DECL_temporary(shader);
  215.    }
  216.  
  217.    for (i = 0; i < 8; ++i) {
  218.       increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
  219.    }
  220.  
  221.    for (i = 0; i < 8; ++i) {
  222.       struct ureg_src s_addr[2];
  223.       s_addr[0] = ureg_src(m[i][0]);
  224.       s_addr[1] = ureg_src(m[i][1]);
  225.       fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
  226.    }
  227.  
  228.    for (i = 1; i < 8; ++i) {
  229.       ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
  230.       ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
  231.    }
  232.  
  233.    ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
  234.    ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
  235.  
  236.    ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
  237.    ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
  238.    ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
  239.  
  240.    ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
  241.             ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
  242.    ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
  243.             ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
  244.  
  245.    ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
  246.    ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
  247.  
  248.    for (i = 0; i < 8; ++i) {
  249.       ureg_release_temporary(shader, m[i][0]);
  250.       ureg_release_temporary(shader, m[i][1]);
  251.    }
  252.  
  253.    ureg_END(shader);
  254.  
  255.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  256. }
  257.  
  258. static void *
  259. create_stage1_vert_shader(struct vl_idct *idct)
  260. {
  261.    struct ureg_program *shader;
  262.    struct ureg_src vrect, vpos;
  263.    struct ureg_src scale;
  264.    struct ureg_dst t_tex, t_start;
  265.    struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
  266.  
  267.    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
  268.    if (!shader)
  269.       return NULL;
  270.  
  271.    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
  272.    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
  273.  
  274.    t_tex = ureg_DECL_temporary(shader);
  275.    t_start = ureg_DECL_temporary(shader);
  276.  
  277.    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
  278.  
  279.    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
  280.    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
  281.  
  282.    o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
  283.    o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
  284.  
  285.    /*
  286.     * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
  287.     *
  288.     * t_vpos = vpos + vrect
  289.     * o_vpos.xy = t_vpos * scale
  290.     * o_vpos.zw = vpos
  291.     *
  292.     * o_l_addr = calc_addr(...)
  293.     * o_r_addr = calc_addr(...)
  294.     *
  295.     */
  296.  
  297.    scale = ureg_imm2f(shader,
  298.       (float)VL_BLOCK_WIDTH / idct->buffer_width,
  299.       (float)VL_BLOCK_HEIGHT / idct->buffer_height);
  300.  
  301.    ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
  302.    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
  303.  
  304.    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
  305.    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
  306.  
  307.    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
  308.  
  309.    calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
  310.    calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);
  311.  
  312.    ureg_release_temporary(shader, t_tex);
  313.    ureg_release_temporary(shader, t_start);
  314.  
  315.    ureg_END(shader);
  316.  
  317.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  318. }
  319.  
  320. static void *
  321. create_stage1_frag_shader(struct vl_idct *idct)
  322. {
  323.    struct ureg_program *shader;
  324.  
  325.    struct ureg_src l_addr[2], r_addr[2];
  326.  
  327.    struct ureg_dst l[4][2], r[2];
  328.    struct ureg_dst *fragment;
  329.  
  330.    int i, j;
  331.  
  332.    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
  333.    if (!shader)
  334.       return NULL;
  335.  
  336.    fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));
  337.  
  338.    l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
  339.    l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
  340.  
  341.    r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
  342.    r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
  343.  
  344.    for (i = 0; i < idct->nr_of_render_targets; ++i)
  345.        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
  346.  
  347.    for (i = 0; i < 4; ++i) {
  348.       l[i][0] = ureg_DECL_temporary(shader);
  349.       l[i][1] = ureg_DECL_temporary(shader);
  350.    }
  351.  
  352.    r[0] = ureg_DECL_temporary(shader);
  353.    r[1] = ureg_DECL_temporary(shader);
  354.  
  355.    for (i = 0; i < 4; ++i) {
  356.       increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
  357.    }
  358.  
  359.    for (i = 0; i < 4; ++i) {
  360.       struct ureg_src s_addr[2];
  361.       s_addr[0] = ureg_src(l[i][0]);
  362.       s_addr[1] = ureg_src(l[i][1]);
  363.       fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
  364.    }
  365.  
  366.    for (i = 0; i < idct->nr_of_render_targets; ++i) {
  367.       struct ureg_src s_addr[2];
  368.  
  369.       increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);
  370.  
  371.       s_addr[0] = ureg_src(r[0]);
  372.       s_addr[1] = ureg_src(r[1]);
  373.       fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
  374.  
  375.       for (j = 0; j < 4; ++j) {
  376.          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
  377.       }
  378.    }
  379.  
  380.    for (i = 0; i < 4; ++i) {
  381.       ureg_release_temporary(shader, l[i][0]);
  382.       ureg_release_temporary(shader, l[i][1]);
  383.    }
  384.    ureg_release_temporary(shader, r[0]);
  385.    ureg_release_temporary(shader, r[1]);
  386.  
  387.    ureg_END(shader);
  388.  
  389.    FREE(fragment);
  390.  
  391.    return ureg_create_shader_and_destroy(shader, idct->pipe);
  392. }
  393.  
  394. void
  395. vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
  396.                            unsigned first_output, struct ureg_dst tex)
  397. {
  398.    struct ureg_src vrect, vpos;
  399.    struct ureg_src scale;
  400.    struct ureg_dst t_start;
  401.    struct ureg_dst o_l_addr[2], o_r_addr[2];
  402.  
  403.    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
  404.    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
  405.  
  406.    t_start = ureg_DECL_temporary(shader);
  407.  
  408.    --first_output;
  409.  
  410.    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
  411.    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
  412.  
  413.    o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
  414.    o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
  415.  
  416.    scale = ureg_imm2f(shader,
  417.       (float)VL_BLOCK_WIDTH / idct->buffer_width,
  418.       (float)VL_BLOCK_HEIGHT / idct->buffer_height);
  419.  
  420.    ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
  421.       ureg_scalar(vrect, TGSI_SWIZZLE_X),
  422.       ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
  423.    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
  424.  
  425.    calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
  426.    calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
  427.  
  428.    ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
  429.    ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
  430. }
  431.  
  432. void
  433. vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
  434.                            unsigned first_input, struct ureg_dst fragment)
  435. {
  436.    struct ureg_src l_addr[2], r_addr[2];
  437.  
  438.    struct ureg_dst l[2], r[2];
  439.  
  440.    --first_input;
  441.  
  442.    l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
  443.    l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
  444.  
  445.    r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
  446.    r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
  447.  
  448.    l[0] = ureg_DECL_temporary(shader);
  449.    l[1] = ureg_DECL_temporary(shader);
  450.    r[0] = ureg_DECL_temporary(shader);
  451.    r[1] = ureg_DECL_temporary(shader);
  452.  
  453.    fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
  454.    fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
  455.  
  456.    matrix_mul(shader, fragment, l, r);
  457.  
  458.    ureg_release_temporary(shader, l[0]);
  459.    ureg_release_temporary(shader, l[1]);
  460.    ureg_release_temporary(shader, r[0]);
  461.    ureg_release_temporary(shader, r[1]);
  462. }
  463.  
  464. static bool
  465. init_shaders(struct vl_idct *idct)
  466. {
  467.    idct->vs_mismatch = create_mismatch_vert_shader(idct);
  468.    if (!idct->vs_mismatch)
  469.       goto error_vs_mismatch;
  470.  
  471.    idct->fs_mismatch = create_mismatch_frag_shader(idct);
  472.    if (!idct->fs_mismatch)
  473.       goto error_fs_mismatch;
  474.  
  475.    idct->vs = create_stage1_vert_shader(idct);
  476.    if (!idct->vs)
  477.       goto error_vs;
  478.  
  479.    idct->fs = create_stage1_frag_shader(idct);
  480.    if (!idct->fs)
  481.       goto error_fs;
  482.  
  483.    return true;
  484.  
  485. error_fs:
  486.    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
  487.  
  488. error_vs:
  489.    idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
  490.  
  491. error_fs_mismatch:
  492.    idct->pipe->delete_vs_state(idct->pipe, idct->fs);
  493.  
  494. error_vs_mismatch:
  495.    return false;
  496. }
  497.  
  498. static void
  499. cleanup_shaders(struct vl_idct *idct)
  500. {
  501.    idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
  502.    idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
  503.    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
  504.    idct->pipe->delete_fs_state(idct->pipe, idct->fs);
  505. }
  506.  
  507. static bool
  508. init_state(struct vl_idct *idct)
  509. {
  510.    struct pipe_blend_state blend;
  511.    struct pipe_rasterizer_state rs_state;
  512.    struct pipe_sampler_state sampler;
  513.    unsigned i;
  514.  
  515.    assert(idct);
  516.  
  517.    memset(&rs_state, 0, sizeof(rs_state));
  518.    rs_state.point_size = 1;
  519.    rs_state.half_pixel_center = true;
  520.    rs_state.bottom_edge_rule = true;
  521.    rs_state.depth_clip = 1;
  522.    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
  523.    if (!idct->rs_state)
  524.       goto error_rs_state;
  525.  
  526.    memset(&blend, 0, sizeof blend);
  527.  
  528.    blend.independent_blend_enable = 0;
  529.    blend.rt[0].blend_enable = 0;
  530.    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
  531.    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
  532.    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
  533.    blend.rt[0].alpha_func = PIPE_BLEND_ADD;
  534.    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
  535.    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
  536.    blend.logicop_enable = 0;
  537.    blend.logicop_func = PIPE_LOGICOP_CLEAR;
  538.    /* Needed to allow color writes to FB, even if blending disabled */
  539.    blend.rt[0].colormask = PIPE_MASK_RGBA;
  540.    blend.dither = 0;
  541.    idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
  542.    if (!idct->blend)
  543.       goto error_blend;
  544.  
  545.    for (i = 0; i < 2; ++i) {
  546.       memset(&sampler, 0, sizeof(sampler));
  547.       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
  548.       sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
  549.       sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
  550.       sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
  551.       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
  552.       sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
  553.       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
  554.       sampler.compare_func = PIPE_FUNC_ALWAYS;
  555.       sampler.normalized_coords = 1;
  556.       idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
  557.       if (!idct->samplers[i])
  558.          goto error_samplers;
  559.    }
  560.  
  561.    return true;
  562.  
  563. error_samplers:
  564.    for (i = 0; i < 2; ++i)
  565.       if (idct->samplers[i])
  566.          idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
  567.  
  568.    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
  569.  
  570. error_blend:
  571.    idct->pipe->delete_blend_state(idct->pipe, idct->blend);
  572.  
  573. error_rs_state:
  574.    return false;
  575. }
  576.  
  577. static void
  578. cleanup_state(struct vl_idct *idct)
  579. {
  580.    unsigned i;
  581.  
  582.    for (i = 0; i < 2; ++i)
  583.       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
  584.  
  585.    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
  586.    idct->pipe->delete_blend_state(idct->pipe, idct->blend);
  587. }
  588.  
  589. static bool
  590. init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
  591. {
  592.    struct pipe_resource *tex;
  593.    struct pipe_surface surf_templ;
  594.  
  595.    assert(idct && buffer);
  596.  
  597.    tex = buffer->sampler_views.individual.source->texture;
  598.  
  599.    buffer->fb_state_mismatch.width = tex->width0;
  600.    buffer->fb_state_mismatch.height = tex->height0;
  601.    buffer->fb_state_mismatch.nr_cbufs = 1;
  602.  
  603.    memset(&surf_templ, 0, sizeof(surf_templ));
  604.    surf_templ.format = tex->format;
  605.    surf_templ.u.tex.first_layer = 0;
  606.    surf_templ.u.tex.last_layer = 0;
  607.    buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
  608.  
  609.    buffer->viewport_mismatch.scale[0] = tex->width0;
  610.    buffer->viewport_mismatch.scale[1] = tex->height0;
  611.    buffer->viewport_mismatch.scale[2] = 1;
  612.    buffer->viewport_mismatch.scale[3] = 1;
  613.  
  614.    return true;
  615. }
  616.  
  617. static void
  618. cleanup_source(struct vl_idct_buffer *buffer)
  619. {
  620.    assert(buffer);
  621.  
  622.    pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
  623.  
  624.    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
  625. }
  626.  
  627. static bool
  628. init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
  629. {
  630.    struct pipe_resource *tex;
  631.    struct pipe_surface surf_templ;
  632.    unsigned i;
  633.  
  634.    assert(idct && buffer);
  635.  
  636.    tex = buffer->sampler_views.individual.intermediate->texture;
  637.  
  638.    buffer->fb_state.width = tex->width0;
  639.    buffer->fb_state.height = tex->height0;
  640.    buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
  641.    for(i = 0; i < idct->nr_of_render_targets; ++i) {
  642.       memset(&surf_templ, 0, sizeof(surf_templ));
  643.       surf_templ.format = tex->format;
  644.       surf_templ.u.tex.first_layer = i;
  645.       surf_templ.u.tex.last_layer = i;
  646.       buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
  647.          idct->pipe, tex, &surf_templ);
  648.  
  649.       if (!buffer->fb_state.cbufs[i])
  650.          goto error_surfaces;
  651.    }
  652.  
  653.    buffer->viewport.scale[0] = tex->width0;
  654.    buffer->viewport.scale[1] = tex->height0;
  655.    buffer->viewport.scale[2] = 1;
  656.    buffer->viewport.scale[3] = 1;
  657.  
  658.    return true;
  659.  
  660. error_surfaces:
  661.    for(i = 0; i < idct->nr_of_render_targets; ++i)
  662.       pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
  663.  
  664.    return false;
  665. }
  666.  
  667. static void
  668. cleanup_intermediate(struct vl_idct_buffer *buffer)
  669. {
  670.    unsigned i;
  671.  
  672.    assert(buffer);
  673.  
  674.    for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
  675.       pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
  676.  
  677.    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
  678. }
  679.  
  680. struct pipe_sampler_view *
  681. vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
  682. {
  683.    struct pipe_resource tex_templ, *matrix;
  684.    struct pipe_sampler_view sv_templ, *sv;
  685.    struct pipe_transfer *buf_transfer;
  686.    unsigned i, j, pitch;
  687.    float *f;
  688.  
  689.    struct pipe_box rect =
  690.    {
  691.       0, 0, 0,
  692.       VL_BLOCK_WIDTH / 4,
  693.       VL_BLOCK_HEIGHT,
  694.       1
  695.    };
  696.  
  697.    assert(pipe);
  698.  
  699.    memset(&tex_templ, 0, sizeof(tex_templ));
  700.    tex_templ.target = PIPE_TEXTURE_2D;
  701.    tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
  702.    tex_templ.last_level = 0;
  703.    tex_templ.width0 = 2;
  704.    tex_templ.height0 = 8;
  705.    tex_templ.depth0 = 1;
  706.    tex_templ.array_size = 1;
  707.    tex_templ.usage = PIPE_USAGE_IMMUTABLE;
  708.    tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
  709.    tex_templ.flags = 0;
  710.  
  711.    matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
  712.    if (!matrix)
  713.       goto error_matrix;
  714.  
  715.    f = pipe->transfer_map(pipe, matrix, 0,
  716.                                      PIPE_TRANSFER_WRITE |
  717.                                      PIPE_TRANSFER_DISCARD_RANGE,
  718.                                      &rect, &buf_transfer);
  719.    if (!f)
  720.       goto error_map;
  721.  
  722.    pitch = buf_transfer->stride / sizeof(float);
  723.  
  724.    for(i = 0; i < VL_BLOCK_HEIGHT; ++i)
  725.       for(j = 0; j < VL_BLOCK_WIDTH; ++j)
  726.          // transpose and scale
  727.          f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
  728.  
  729.    pipe->transfer_unmap(pipe, buf_transfer);
  730.  
  731.    memset(&sv_templ, 0, sizeof(sv_templ));
  732.    u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
  733.    sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
  734.    pipe_resource_reference(&matrix, NULL);
  735.    if (!sv)
  736.       goto error_map;
  737.  
  738.    return sv;
  739.  
  740. error_map:
  741.    pipe_resource_reference(&matrix, NULL);
  742.  
  743. error_matrix:
  744.    return NULL;
  745. }
  746.  
  747. bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
  748.                   unsigned buffer_width, unsigned buffer_height,
  749.                   unsigned nr_of_render_targets,
  750.                   struct pipe_sampler_view *matrix,
  751.                   struct pipe_sampler_view *transpose)
  752. {
  753.    assert(idct && pipe);
  754.    assert(matrix && transpose);
  755.  
  756.    idct->pipe = pipe;
  757.    idct->buffer_width = buffer_width;
  758.    idct->buffer_height = buffer_height;
  759.    idct->nr_of_render_targets = nr_of_render_targets;
  760.  
  761.    pipe_sampler_view_reference(&idct->matrix, matrix);
  762.    pipe_sampler_view_reference(&idct->transpose, transpose);
  763.  
  764.    if(!init_shaders(idct))
  765.       return false;
  766.  
  767.    if(!init_state(idct)) {
  768.       cleanup_shaders(idct);
  769.       return false;
  770.    }
  771.  
  772.    return true;
  773. }
  774.  
  775. void
  776. vl_idct_cleanup(struct vl_idct *idct)
  777. {
  778.    cleanup_shaders(idct);
  779.    cleanup_state(idct);
  780.  
  781.    pipe_sampler_view_reference(&idct->matrix, NULL);
  782.    pipe_sampler_view_reference(&idct->transpose, NULL);
  783. }
  784.  
  785. bool
  786. vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
  787.                     struct pipe_sampler_view *source,
  788.                     struct pipe_sampler_view *intermediate)
  789. {
  790.    assert(buffer && idct);
  791.    assert(source && intermediate);
  792.  
  793.    memset(buffer, 0, sizeof(struct vl_idct_buffer));
  794.  
  795.    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
  796.    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
  797.    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
  798.    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
  799.  
  800.    if (!init_source(idct, buffer))
  801.       return false;
  802.  
  803.    if (!init_intermediate(idct, buffer))
  804.       return false;
  805.  
  806.    return true;
  807. }
  808.  
  809. void
  810. vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
  811. {
  812.    assert(buffer);
  813.  
  814.    cleanup_source(buffer);
  815.    cleanup_intermediate(buffer);
  816.  
  817.    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
  818.    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
  819. }
  820.  
  821. void
  822. vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
  823. {
  824.    assert(buffer);
  825.  
  826.    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
  827.    idct->pipe->bind_blend_state(idct->pipe, idct->blend);
  828.    idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
  829.    idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
  830.  
  831.    /* mismatch control */
  832.    idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
  833.    idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);
  834.    idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
  835.    idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
  836.    util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
  837.  
  838.    /* first stage */
  839.    idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
  840.    idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);
  841.    idct->pipe->bind_vs_state(idct->pipe, idct->vs);
  842.    idct->pipe->bind_fs_state(idct->pipe, idct->fs);
  843.    util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
  844. }
  845.  
  846. void
  847. vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
  848. {
  849.    assert(buffer);
  850.  
  851.    /* second stage */
  852.    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
  853.    idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
  854.    idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
  855. }
  856.  
  857.