Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file brw_wm_channel_expressions.cpp
  26.  *
  27.  * Breaks vector operations down into operations on each component.
  28.  *
  29.  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
  30.  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
  31.  * ALU operation operates on one of those channel registers.  As a
  32.  * result, there is no value to the 965 fragment shader in tracking
  33.  * "vector" expressions in the sense of GLSL fragment shaders, when
  34.  * doing a channel at a time may help in constant folding, algebraic
  35.  * simplification, and reducing the liveness of channel registers.
  36.  *
  37.  * The exception to the desire to break everything down to floats is
  38.  * texturing.  The texture sampler returns a writemasked masked
  39.  * 4/8-register sequence containing the texture values.  We don't want
  40.  * to dispatch to the sampler separately for each channel we need, so
  41.  * we do retain the vector types in that case.
  42.  */
  43.  
  44. #include "main/core.h"
  45. #include "brw_wm.h"
  46. #include "glsl/ir.h"
  47. #include "glsl/ir_expression_flattening.h"
  48. #include "glsl/glsl_types.h"
  49.  
  50. class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
  51. public:
  52.    ir_channel_expressions_visitor()
  53.    {
  54.       this->progress = false;
  55.       this->mem_ctx = NULL;
  56.    }
  57.  
  58.    ir_visitor_status visit_leave(ir_assignment *);
  59.  
  60.    ir_rvalue *get_element(ir_variable *var, unsigned int element);
  61.    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
  62.  
  63.    bool progress;
  64.    void *mem_ctx;
  65. };
  66.  
  67. static bool
  68. channel_expressions_predicate(ir_instruction *ir)
  69. {
  70.    ir_expression *expr = ir->as_expression();
  71.    unsigned int i;
  72.  
  73.    if (!expr)
  74.       return false;
  75.  
  76.    switch (expr->operation) {
  77.       /* these opcodes need to act on the whole vector,
  78.        * just like texturing.
  79.        */
  80.       case ir_unop_interpolate_at_centroid:
  81.       case ir_binop_interpolate_at_offset:
  82.       case ir_binop_interpolate_at_sample:
  83.          return false;
  84.       default:
  85.          break;
  86.    }
  87.  
  88.    for (i = 0; i < expr->get_num_operands(); i++) {
  89.       if (expr->operands[i]->type->is_vector())
  90.          return true;
  91.    }
  92.  
  93.    return false;
  94. }
  95.  
  96. bool
  97. brw_do_channel_expressions(exec_list *instructions)
  98. {
  99.    ir_channel_expressions_visitor v;
  100.  
  101.    /* Pull out any matrix expression to a separate assignment to a
  102.     * temp.  This will make our handling of the breakdown to
  103.     * operations on the matrix's vector components much easier.
  104.     */
  105.    do_expression_flattening(instructions, channel_expressions_predicate);
  106.  
  107.    visit_list_elements(&v, instructions);
  108.  
  109.    return v.progress;
  110. }
  111.  
  112. ir_rvalue *
  113. ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
  114. {
  115.    ir_dereference *deref;
  116.  
  117.    if (var->type->is_scalar())
  118.       return new(mem_ctx) ir_dereference_variable(var);
  119.  
  120.    assert(elem < var->type->components());
  121.    deref = new(mem_ctx) ir_dereference_variable(var);
  122.    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
  123. }
  124.  
  125. void
  126. ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
  127. {
  128.    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
  129.    ir_assignment *assign;
  130.  
  131.    /* This assign-of-expression should have been generated by the
  132.     * expression flattening visitor (since we never short circit to
  133.     * not flatten, even for plain assignments of variables), so the
  134.     * writemask is always full.
  135.     */
  136.    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
  137.  
  138.    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
  139.    ir->insert_before(assign);
  140. }
  141.  
  142. ir_visitor_status
  143. ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
  144. {
  145.    ir_expression *expr = ir->rhs->as_expression();
  146.    bool found_vector = false;
  147.    unsigned int i, vector_elements = 1;
  148.    ir_variable *op_var[3];
  149.  
  150.    if (!expr)
  151.       return visit_continue;
  152.  
  153.    if (!this->mem_ctx)
  154.       this->mem_ctx = ralloc_parent(ir);
  155.  
  156.    for (i = 0; i < expr->get_num_operands(); i++) {
  157.       if (expr->operands[i]->type->is_vector()) {
  158.          found_vector = true;
  159.          vector_elements = expr->operands[i]->type->vector_elements;
  160.          break;
  161.       }
  162.    }
  163.    if (!found_vector)
  164.       return visit_continue;
  165.  
  166.    switch (expr->operation) {
  167.       case ir_unop_interpolate_at_centroid:
  168.       case ir_binop_interpolate_at_offset:
  169.       case ir_binop_interpolate_at_sample:
  170.          return visit_continue;
  171.  
  172.       default:
  173.          break;
  174.    }
  175.  
  176.    /* Store the expression operands in temps so we can use them
  177.     * multiple times.
  178.     */
  179.    for (i = 0; i < expr->get_num_operands(); i++) {
  180.       ir_assignment *assign;
  181.       ir_dereference *deref;
  182.  
  183.       assert(!expr->operands[i]->type->is_matrix());
  184.  
  185.       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
  186.                                            "channel_expressions",
  187.                                            ir_var_temporary);
  188.       ir->insert_before(op_var[i]);
  189.  
  190.       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
  191.       assign = new(mem_ctx) ir_assignment(deref,
  192.                                           expr->operands[i],
  193.                                           NULL);
  194.       ir->insert_before(assign);
  195.    }
  196.  
  197.    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
  198.                                                            1, 1);
  199.  
  200.    /* OK, time to break down this vector operation. */
  201.    switch (expr->operation) {
  202.    case ir_unop_bit_not:
  203.    case ir_unop_logic_not:
  204.    case ir_unop_neg:
  205.    case ir_unop_abs:
  206.    case ir_unop_sign:
  207.    case ir_unop_rcp:
  208.    case ir_unop_rsq:
  209.    case ir_unop_sqrt:
  210.    case ir_unop_exp:
  211.    case ir_unop_log:
  212.    case ir_unop_exp2:
  213.    case ir_unop_log2:
  214.    case ir_unop_bitcast_i2f:
  215.    case ir_unop_bitcast_f2i:
  216.    case ir_unop_bitcast_f2u:
  217.    case ir_unop_bitcast_u2f:
  218.    case ir_unop_i2u:
  219.    case ir_unop_u2i:
  220.    case ir_unop_f2i:
  221.    case ir_unop_f2u:
  222.    case ir_unop_i2f:
  223.    case ir_unop_f2b:
  224.    case ir_unop_b2f:
  225.    case ir_unop_i2b:
  226.    case ir_unop_b2i:
  227.    case ir_unop_u2f:
  228.    case ir_unop_trunc:
  229.    case ir_unop_ceil:
  230.    case ir_unop_floor:
  231.    case ir_unop_fract:
  232.    case ir_unop_round_even:
  233.    case ir_unop_sin:
  234.    case ir_unop_cos:
  235.    case ir_unop_dFdx:
  236.    case ir_unop_dFdx_coarse:
  237.    case ir_unop_dFdx_fine:
  238.    case ir_unop_dFdy:
  239.    case ir_unop_dFdy_coarse:
  240.    case ir_unop_dFdy_fine:
  241.    case ir_unop_bitfield_reverse:
  242.    case ir_unop_bit_count:
  243.    case ir_unop_find_msb:
  244.    case ir_unop_find_lsb:
  245.    case ir_unop_saturate:
  246.       for (i = 0; i < vector_elements; i++) {
  247.          ir_rvalue *op0 = get_element(op_var[0], i);
  248.  
  249.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  250.                                                   element_type,
  251.                                                   op0,
  252.                                                   NULL));
  253.       }
  254.       break;
  255.  
  256.    case ir_binop_add:
  257.    case ir_binop_sub:
  258.    case ir_binop_mul:
  259.    case ir_binop_imul_high:
  260.    case ir_binop_div:
  261.    case ir_binop_carry:
  262.    case ir_binop_borrow:
  263.    case ir_binop_mod:
  264.    case ir_binop_min:
  265.    case ir_binop_max:
  266.    case ir_binop_pow:
  267.    case ir_binop_lshift:
  268.    case ir_binop_rshift:
  269.    case ir_binop_bit_and:
  270.    case ir_binop_bit_xor:
  271.    case ir_binop_bit_or:
  272.    case ir_binop_logic_and:
  273.    case ir_binop_logic_xor:
  274.    case ir_binop_logic_or:
  275.    case ir_binop_less:
  276.    case ir_binop_greater:
  277.    case ir_binop_lequal:
  278.    case ir_binop_gequal:
  279.    case ir_binop_equal:
  280.    case ir_binop_nequal:
  281.       for (i = 0; i < vector_elements; i++) {
  282.          ir_rvalue *op0 = get_element(op_var[0], i);
  283.          ir_rvalue *op1 = get_element(op_var[1], i);
  284.  
  285.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  286.                                                   element_type,
  287.                                                   op0,
  288.                                                   op1));
  289.       }
  290.       break;
  291.  
  292.    case ir_unop_any: {
  293.       ir_expression *temp;
  294.       temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
  295.                                         element_type,
  296.                                         get_element(op_var[0], 0),
  297.                                         get_element(op_var[0], 1));
  298.  
  299.       for (i = 2; i < vector_elements; i++) {
  300.          temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
  301.                                            element_type,
  302.                                            get_element(op_var[0], i),
  303.                                            temp);
  304.       }
  305.       assign(ir, 0, temp);
  306.       break;
  307.    }
  308.  
  309.    case ir_binop_dot: {
  310.       ir_expression *last = NULL;
  311.       for (i = 0; i < vector_elements; i++) {
  312.          ir_rvalue *op0 = get_element(op_var[0], i);
  313.          ir_rvalue *op1 = get_element(op_var[1], i);
  314.          ir_expression *temp;
  315.  
  316.          temp = new(mem_ctx) ir_expression(ir_binop_mul,
  317.                                            element_type,
  318.                                            op0,
  319.                                            op1);
  320.          if (last) {
  321.             last = new(mem_ctx) ir_expression(ir_binop_add,
  322.                                               element_type,
  323.                                               temp,
  324.                                               last);
  325.          } else {
  326.             last = temp;
  327.          }
  328.       }
  329.       assign(ir, 0, last);
  330.       break;
  331.    }
  332.  
  333.    case ir_binop_all_equal:
  334.    case ir_binop_any_nequal: {
  335.       ir_expression *last = NULL;
  336.       for (i = 0; i < vector_elements; i++) {
  337.          ir_rvalue *op0 = get_element(op_var[0], i);
  338.          ir_rvalue *op1 = get_element(op_var[1], i);
  339.          ir_expression *temp;
  340.          ir_expression_operation join;
  341.  
  342.          if (expr->operation == ir_binop_all_equal)
  343.             join = ir_binop_logic_and;
  344.          else
  345.             join = ir_binop_logic_or;
  346.  
  347.          temp = new(mem_ctx) ir_expression(expr->operation,
  348.                                            element_type,
  349.                                            op0,
  350.                                            op1);
  351.          if (last) {
  352.             last = new(mem_ctx) ir_expression(join,
  353.                                               element_type,
  354.                                               temp,
  355.                                               last);
  356.          } else {
  357.             last = temp;
  358.          }
  359.       }
  360.       assign(ir, 0, last);
  361.       break;
  362.    }
  363.    case ir_unop_noise:
  364.       unreachable("noise should have been broken down to function call");
  365.  
  366.    case ir_binop_bfm: {
  367.       /* Does not need to be scalarized, since its result will be identical
  368.        * for all channels.
  369.        */
  370.       ir_rvalue *op0 = get_element(op_var[0], 0);
  371.       ir_rvalue *op1 = get_element(op_var[1], 0);
  372.  
  373.       assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
  374.                                                element_type,
  375.                                                op0,
  376.                                                op1));
  377.       break;
  378.    }
  379.  
  380.    case ir_binop_ubo_load:
  381.       unreachable("not yet supported");
  382.  
  383.    case ir_triop_fma:
  384.    case ir_triop_lrp:
  385.    case ir_triop_csel:
  386.    case ir_triop_bitfield_extract:
  387.       for (i = 0; i < vector_elements; i++) {
  388.          ir_rvalue *op0 = get_element(op_var[0], i);
  389.          ir_rvalue *op1 = get_element(op_var[1], i);
  390.          ir_rvalue *op2 = get_element(op_var[2], i);
  391.  
  392.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  393.                                                   element_type,
  394.                                                   op0,
  395.                                                   op1,
  396.                                                   op2));
  397.       }
  398.       break;
  399.  
  400.    case ir_triop_bfi: {
  401.       /* Only a single BFM is needed for multiple BFIs. */
  402.       ir_rvalue *op0 = get_element(op_var[0], 0);
  403.  
  404.       for (i = 0; i < vector_elements; i++) {
  405.          ir_rvalue *op1 = get_element(op_var[1], i);
  406.          ir_rvalue *op2 = get_element(op_var[2], i);
  407.  
  408.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  409.                                                   element_type,
  410.                                                   op0->clone(mem_ctx, NULL),
  411.                                                   op1,
  412.                                                   op2));
  413.       }
  414.       break;
  415.    }
  416.  
  417.    case ir_unop_pack_snorm_2x16:
  418.    case ir_unop_pack_snorm_4x8:
  419.    case ir_unop_pack_unorm_2x16:
  420.    case ir_unop_pack_unorm_4x8:
  421.    case ir_unop_pack_half_2x16:
  422.    case ir_unop_unpack_snorm_2x16:
  423.    case ir_unop_unpack_snorm_4x8:
  424.    case ir_unop_unpack_unorm_2x16:
  425.    case ir_unop_unpack_unorm_4x8:
  426.    case ir_unop_unpack_half_2x16:
  427.    case ir_binop_ldexp:
  428.    case ir_binop_vector_extract:
  429.    case ir_triop_vector_insert:
  430.    case ir_quadop_bitfield_insert:
  431.    case ir_quadop_vector:
  432.       unreachable("should have been lowered");
  433.  
  434.    case ir_unop_unpack_half_2x16_split_x:
  435.    case ir_unop_unpack_half_2x16_split_y:
  436.    case ir_binop_pack_half_2x16_split:
  437.    case ir_unop_interpolate_at_centroid:
  438.    case ir_binop_interpolate_at_offset:
  439.    case ir_binop_interpolate_at_sample:
  440.       unreachable("not reached: expression operates on scalars only");
  441.  
  442.    case ir_unop_pack_double_2x32:
  443.    case ir_unop_unpack_double_2x32:
  444.    case ir_unop_frexp_sig:
  445.    case ir_unop_frexp_exp:
  446.    case ir_unop_d2f:
  447.    case ir_unop_f2d:
  448.    case ir_unop_d2i:
  449.    case ir_unop_i2d:
  450.    case ir_unop_d2u:
  451.    case ir_unop_u2d:
  452.    case ir_unop_d2b:
  453.       unreachable("no fp64 support yet");
  454.    }
  455.  
  456.    ir->remove();
  457.    this->progress = true;
  458.  
  459.    return visit_continue;
  460. }
  461.