Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "i915_reg.h"
  29. #include "i915_context.h"
  30. #include "i915_fpc.h"
  31. #include "util/u_math.h"
  32.  
  33. uint
  34. i915_get_temp(struct i915_fp_compile *p)
  35. {
  36.    int bit = ffs(~p->temp_flag);
  37.    if (!bit) {
  38.       i915_program_error(p, "i915_get_temp: out of temporaries");
  39.       return 0;
  40.    }
  41.  
  42.    p->temp_flag |= 1 << (bit - 1);
  43.    return bit - 1;
  44. }
  45.  
  46.  
  47. static void
  48. i915_release_temp(struct i915_fp_compile *p, int reg)
  49. {
  50.    p->temp_flag &= ~(1 << reg);
  51. }
  52.  
  53.  
  54. /**
  55.  * Get unpreserved temporary, a temp whose value is not preserved between
  56.  * PS program phases.
  57.  */
  58. uint
  59. i915_get_utemp(struct i915_fp_compile * p)
  60. {
  61.    int bit = ffs(~p->utemp_flag);
  62.    if (!bit) {
  63.       i915_program_error(p, "i915_get_utemp: out of temporaries");
  64.       return 0;
  65.    }
  66.  
  67.    p->utemp_flag |= 1 << (bit - 1);
  68.    return UREG(REG_TYPE_U, (bit - 1));
  69. }
  70.  
  71. void
  72. i915_release_utemps(struct i915_fp_compile *p)
  73. {
  74.    p->utemp_flag = ~0x7;
  75. }
  76.  
  77.  
  78. uint
  79. i915_emit_decl(struct i915_fp_compile *p,
  80.                uint type, uint nr, uint d0_flags)
  81. {
  82.    uint reg = UREG(type, nr);
  83.  
  84.    if (type == REG_TYPE_T) {
  85.       if (p->decl_t & (1 << nr))
  86.          return reg;
  87.  
  88.       p->decl_t |= (1 << nr);
  89.    }
  90.    else if (type == REG_TYPE_S) {
  91.       if (p->decl_s & (1 << nr))
  92.          return reg;
  93.  
  94.       p->decl_s |= (1 << nr);
  95.    }
  96.    else
  97.       return reg;
  98.  
  99.    if (p->decl< p->declarations + I915_PROGRAM_SIZE) {
  100.       *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
  101.       *(p->decl++) = D1_MBZ;
  102.       *(p->decl++) = D2_MBZ;
  103.    }
  104.    else
  105.       i915_program_error(p, "Out of declarations");
  106.  
  107.    p->nr_decl_insn++;
  108.    return reg;
  109. }
  110.  
  111. uint
  112. i915_emit_arith(struct i915_fp_compile * p,
  113.                 uint op,
  114.                 uint dest,
  115.                 uint mask,
  116.                 uint saturate, uint src0, uint src1, uint src2)
  117. {
  118.    uint c[3];
  119.    uint nr_const = 0;
  120.  
  121.    assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
  122.    dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
  123.    assert(dest);
  124.  
  125.    if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
  126.       c[nr_const++] = 0;
  127.    if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
  128.       c[nr_const++] = 1;
  129.    if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
  130.       c[nr_const++] = 2;
  131.  
  132.    /* Recursively call this function to MOV additional const values
  133.     * into temporary registers.  Use utemp registers for this -
  134.     * currently shouldn't be possible to run out, but keep an eye on
  135.     * this.
  136.     */
  137.    if (nr_const > 1) {
  138.       uint s[3], first, i, old_utemp_flag;
  139.  
  140.       s[0] = src0;
  141.       s[1] = src1;
  142.       s[2] = src2;
  143.       old_utemp_flag = p->utemp_flag;
  144.  
  145.       first = GET_UREG_NR(s[c[0]]);
  146.       for (i = 1; i < nr_const; i++) {
  147.          if (GET_UREG_NR(s[c[i]]) != first) {
  148.             uint tmp = i915_get_utemp(p);
  149.  
  150.             i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
  151.                             s[c[i]], 0, 0);
  152.             s[c[i]] = tmp;
  153.          }
  154.       }
  155.  
  156.       src0 = s[0];
  157.       src1 = s[1];
  158.       src2 = s[2];
  159.       p->utemp_flag = old_utemp_flag;   /* restore */
  160.    }
  161.  
  162.    if (p->csr< p->program + I915_PROGRAM_SIZE) {
  163.       *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
  164.       *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
  165.       *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
  166.    }
  167.    else
  168.       i915_program_error(p, "Out of instructions");
  169.  
  170.    if (GET_UREG_TYPE(dest) == REG_TYPE_R)
  171.       p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
  172.  
  173.    p->nr_alu_insn++;
  174.    return dest;
  175. }
  176.  
  177.  
  178. /**
  179.  * Emit a texture load or texkill instruction.
  180.  * \param dest  the dest i915 register
  181.  * \param destmask  the dest register writemask
  182.  * \param sampler  the i915 sampler register
  183.  * \param coord  the i915 source texcoord operand
  184.  * \param opcode  the instruction opcode
  185.  */
  186. uint i915_emit_texld( struct i915_fp_compile *p,
  187.                       uint dest,
  188.                       uint destmask,
  189.                       uint sampler,
  190.                       uint coord,
  191.                       uint opcode,
  192.                       uint num_coord )
  193. {
  194.    const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
  195.  
  196.    int temp = -1;
  197.    uint ignore = 0;
  198.  
  199.    /* Eliminate the useless texture coordinates. Otherwise we end up generating
  200.     * a swizzle for no reason below. */
  201.    switch(num_coord) {
  202.       case 0:
  203.          /* Ignore x */
  204.          ignore |= (0xf << UREG_CHANNEL_X_SHIFT);
  205.       case 1:
  206.          /* Ignore y */
  207.          ignore |= (0xf << UREG_CHANNEL_Y_SHIFT);
  208.       case 2:
  209.          /* Ignore z */
  210.          ignore |= (0xf << UREG_CHANNEL_Z_SHIFT);
  211.       case 3:
  212.          /* Ignore w */
  213.          ignore |= (0xf << UREG_CHANNEL_W_SHIFT);
  214.    }
  215.  
  216.    if ( (coord & ~ignore ) != (k & ~ignore) ) {
  217.       /* texcoord is swizzled or negated.  Need to allocate a new temporary
  218.        * register (a utemp / unpreserved temp) won't do.
  219.        */
  220.       uint tempReg;
  221.  
  222.       temp = i915_get_temp(p);           /* get temp reg index */
  223.       tempReg = UREG(REG_TYPE_R, temp);  /* make i915 register */
  224.  
  225.       i915_emit_arith( p, A0_MOV,
  226.                        tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
  227.                        0,                            /* saturate */
  228.                        coord, 0, 0 );                /* src0, src1, src2 */
  229.  
  230.       /* new src texcoord is tempReg */
  231.       coord = tempReg;
  232.    }
  233.  
  234.    /* Don't worry about saturate as we only support  
  235.     */
  236.    if (destmask != A0_DEST_CHANNEL_ALL) {
  237.       /* if not writing to XYZW... */
  238.       uint tmp = i915_get_utemp(p);
  239.       i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode, num_coord );
  240.       i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
  241.       /* XXX release utemp here? */
  242.    }
  243.    else {
  244.       assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
  245.       assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
  246.  
  247.       /* Output register being oC or oD defines a phase boundary */
  248.       if (GET_UREG_TYPE(dest) == REG_TYPE_OC ||
  249.           GET_UREG_TYPE(dest) == REG_TYPE_OD)
  250.          p->nr_tex_indirect++;
  251.  
  252.       /* Reading from an r# register whose contents depend on output of the
  253.        * current phase defines a phase boundary.
  254.        */
  255.       if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
  256.           p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
  257.          p->nr_tex_indirect++;
  258.  
  259.       if (p->csr< p->program + I915_PROGRAM_SIZE) {
  260.          *(p->csr++) = (opcode |
  261.                         T0_DEST( dest ) |
  262.                         T0_SAMPLER( sampler ));
  263.  
  264.          *(p->csr++) = T1_ADDRESS_REG( coord );
  265.          *(p->csr++) = T2_MBZ;
  266.       }
  267.       else
  268.          i915_program_error(p, "Out of instructions");
  269.  
  270.       if (GET_UREG_TYPE(dest) == REG_TYPE_R)
  271.          p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
  272.  
  273.       p->nr_tex_insn++;
  274.    }
  275.  
  276.    if (temp >= 0)
  277.       i915_release_temp(p, temp);
  278.  
  279.    return dest;
  280. }
  281.  
  282.  
  283. uint
  284. i915_emit_const1f(struct i915_fp_compile * p, float c0)
  285. {
  286.    struct i915_fragment_shader *ifs = p->shader;
  287.    unsigned reg, idx;
  288.  
  289.    if (c0 == 0.0)
  290.       return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
  291.    if (c0 == 1.0)
  292.       return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
  293.  
  294.    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
  295.       if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
  296.          continue;
  297.       for (idx = 0; idx < 4; idx++) {
  298.          if (!(ifs->constant_flags[reg] & (1 << idx)) ||
  299.              ifs->constants[reg][idx] == c0) {
  300.             ifs->constants[reg][idx] = c0;
  301.             ifs->constant_flags[reg] |= 1 << idx;
  302.             if (reg + 1 > ifs->num_constants)
  303.                ifs->num_constants = reg + 1;
  304.             return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
  305.          }
  306.       }
  307.    }
  308.  
  309.    i915_program_error(p, "i915_emit_const1f: out of constants");
  310.    return 0;
  311. }
  312.  
  313. uint
  314. i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
  315. {
  316.    struct i915_fragment_shader *ifs = p->shader;
  317.    unsigned reg, idx;
  318.  
  319.    if (c0 == 0.0)
  320.       return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
  321.    if (c0 == 1.0)
  322.       return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
  323.  
  324.    if (c1 == 0.0)
  325.       return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
  326.    if (c1 == 1.0)
  327.       return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
  328.  
  329.    // XXX emit swizzle here for 0, 1, -1 and any combination thereof
  330.    // we can use swizzle + neg for that
  331.    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
  332.       if (ifs->constant_flags[reg] == 0xf ||
  333.           ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
  334.          continue;
  335.       for (idx = 0; idx < 3; idx++) {
  336.          if (!(ifs->constant_flags[reg] & (3 << idx))) {
  337.             ifs->constants[reg][idx + 0] = c0;
  338.             ifs->constants[reg][idx + 1] = c1;
  339.             ifs->constant_flags[reg] |= 3 << idx;
  340.             if (reg + 1 > ifs->num_constants)
  341.                ifs->num_constants = reg + 1;
  342.             return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
  343.          }
  344.       }
  345.    }
  346.  
  347.    i915_program_error(p, "i915_emit_const2f: out of constants");
  348.    return 0;
  349. }
  350.  
  351. uint
  352. i915_emit_const4f(struct i915_fp_compile * p,
  353.                   float c0, float c1, float c2, float c3)
  354. {
  355.    struct i915_fragment_shader *ifs = p->shader;
  356.    unsigned reg;
  357.  
  358.    // XXX emit swizzle here for 0, 1, -1 and any combination thereof
  359.    // we can use swizzle + neg for that
  360.    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
  361.       if (ifs->constant_flags[reg] == 0xf &&
  362.           ifs->constants[reg][0] == c0 &&
  363.           ifs->constants[reg][1] == c1 &&
  364.           ifs->constants[reg][2] == c2 &&
  365.           ifs->constants[reg][3] == c3) {
  366.          return UREG(REG_TYPE_CONST, reg);
  367.       }
  368.       else if (ifs->constant_flags[reg] == 0) {
  369.  
  370.          ifs->constants[reg][0] = c0;
  371.          ifs->constants[reg][1] = c1;
  372.          ifs->constants[reg][2] = c2;
  373.          ifs->constants[reg][3] = c3;
  374.          ifs->constant_flags[reg] = 0xf;
  375.          if (reg + 1 > ifs->num_constants)
  376.             ifs->num_constants = reg + 1;
  377.          return UREG(REG_TYPE_CONST, reg);
  378.       }
  379.    }
  380.  
  381.    i915_program_error(p, "i915_emit_const4f: out of constants");
  382.    return 0;
  383. }
  384.  
  385.  
  386. uint
  387. i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
  388. {
  389.    return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
  390. }
  391.