Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file brw_wm_channel_expressions.cpp
  26.  *
  27.  * Breaks vector operations down into operations on each component.
  28.  *
  29.  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
  30.  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
  31.  * ALU operation operates on one of those channel registers.  As a
  32.  * result, there is no value to the 965 fragment shader in tracking
  33.  * "vector" expressions in the sense of GLSL fragment shaders, when
  34.  * doing a channel at a time may help in constant folding, algebraic
  35.  * simplification, and reducing the liveness of channel registers.
  36.  *
  37.  * The exception to the desire to break everything down to floats is
  38.  * texturing.  The texture sampler returns a writemasked masked
  39.  * 4/8-register sequence containing the texture values.  We don't want
  40.  * to dispatch to the sampler separately for each channel we need, so
  41.  * we do retain the vector types in that case.
  42.  */
  43.  
  44. extern "C" {
  45. #include "main/core.h"
  46. #include "brw_wm.h"
  47. }
  48. #include "glsl/ir.h"
  49. #include "glsl/ir_expression_flattening.h"
  50. #include "glsl/glsl_types.h"
  51.  
  52. class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
  53. public:
  54.    ir_channel_expressions_visitor()
  55.    {
  56.       this->progress = false;
  57.       this->mem_ctx = NULL;
  58.    }
  59.  
  60.    ir_visitor_status visit_leave(ir_assignment *);
  61.  
  62.    ir_rvalue *get_element(ir_variable *var, unsigned int element);
  63.    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
  64.  
  65.    bool progress;
  66.    void *mem_ctx;
  67. };
  68.  
  69. static bool
  70. channel_expressions_predicate(ir_instruction *ir)
  71. {
  72.    ir_expression *expr = ir->as_expression();
  73.    unsigned int i;
  74.  
  75.    if (!expr)
  76.       return false;
  77.  
  78.    for (i = 0; i < expr->get_num_operands(); i++) {
  79.       if (expr->operands[i]->type->is_vector())
  80.          return true;
  81.    }
  82.  
  83.    return false;
  84. }
  85.  
  86. bool
  87. brw_do_channel_expressions(exec_list *instructions)
  88. {
  89.    ir_channel_expressions_visitor v;
  90.  
  91.    /* Pull out any matrix expression to a separate assignment to a
  92.     * temp.  This will make our handling of the breakdown to
  93.     * operations on the matrix's vector components much easier.
  94.     */
  95.    do_expression_flattening(instructions, channel_expressions_predicate);
  96.  
  97.    visit_list_elements(&v, instructions);
  98.  
  99.    return v.progress;
  100. }
  101.  
  102. ir_rvalue *
  103. ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
  104. {
  105.    ir_dereference *deref;
  106.  
  107.    if (var->type->is_scalar())
  108.       return new(mem_ctx) ir_dereference_variable(var);
  109.  
  110.    assert(elem < var->type->components());
  111.    deref = new(mem_ctx) ir_dereference_variable(var);
  112.    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
  113. }
  114.  
  115. void
  116. ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
  117. {
  118.    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
  119.    ir_assignment *assign;
  120.  
  121.    /* This assign-of-expression should have been generated by the
  122.     * expression flattening visitor (since we never short circit to
  123.     * not flatten, even for plain assignments of variables), so the
  124.     * writemask is always full.
  125.     */
  126.    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
  127.  
  128.    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
  129.    ir->insert_before(assign);
  130. }
  131.  
  132. ir_visitor_status
  133. ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
  134. {
  135.    ir_expression *expr = ir->rhs->as_expression();
  136.    bool found_vector = false;
  137.    unsigned int i, vector_elements = 1;
  138.    ir_variable *op_var[3];
  139.  
  140.    if (!expr)
  141.       return visit_continue;
  142.  
  143.    if (!this->mem_ctx)
  144.       this->mem_ctx = ralloc_parent(ir);
  145.  
  146.    for (i = 0; i < expr->get_num_operands(); i++) {
  147.       if (expr->operands[i]->type->is_vector()) {
  148.          found_vector = true;
  149.          vector_elements = expr->operands[i]->type->vector_elements;
  150.          break;
  151.       }
  152.    }
  153.    if (!found_vector)
  154.       return visit_continue;
  155.  
  156.    /* Store the expression operands in temps so we can use them
  157.     * multiple times.
  158.     */
  159.    for (i = 0; i < expr->get_num_operands(); i++) {
  160.       ir_assignment *assign;
  161.       ir_dereference *deref;
  162.  
  163.       assert(!expr->operands[i]->type->is_matrix());
  164.  
  165.       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
  166.                                            "channel_expressions",
  167.                                            ir_var_temporary);
  168.       ir->insert_before(op_var[i]);
  169.  
  170.       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
  171.       assign = new(mem_ctx) ir_assignment(deref,
  172.                                           expr->operands[i],
  173.                                           NULL);
  174.       ir->insert_before(assign);
  175.    }
  176.  
  177.    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
  178.                                                            1, 1);
  179.  
  180.    /* OK, time to break down this vector operation. */
  181.    switch (expr->operation) {
  182.    case ir_unop_bit_not:
  183.    case ir_unop_logic_not:
  184.    case ir_unop_neg:
  185.    case ir_unop_abs:
  186.    case ir_unop_sign:
  187.    case ir_unop_rcp:
  188.    case ir_unop_rsq:
  189.    case ir_unop_sqrt:
  190.    case ir_unop_exp:
  191.    case ir_unop_log:
  192.    case ir_unop_exp2:
  193.    case ir_unop_log2:
  194.    case ir_unop_bitcast_i2f:
  195.    case ir_unop_bitcast_f2i:
  196.    case ir_unop_bitcast_f2u:
  197.    case ir_unop_bitcast_u2f:
  198.    case ir_unop_i2u:
  199.    case ir_unop_u2i:
  200.    case ir_unop_f2i:
  201.    case ir_unop_f2u:
  202.    case ir_unop_i2f:
  203.    case ir_unop_f2b:
  204.    case ir_unop_b2f:
  205.    case ir_unop_i2b:
  206.    case ir_unop_b2i:
  207.    case ir_unop_u2f:
  208.    case ir_unop_trunc:
  209.    case ir_unop_ceil:
  210.    case ir_unop_floor:
  211.    case ir_unop_fract:
  212.    case ir_unop_round_even:
  213.    case ir_unop_sin:
  214.    case ir_unop_cos:
  215.    case ir_unop_sin_reduced:
  216.    case ir_unop_cos_reduced:
  217.    case ir_unop_dFdx:
  218.    case ir_unop_dFdy:
  219.    case ir_unop_bitfield_reverse:
  220.    case ir_unop_bit_count:
  221.    case ir_unop_find_msb:
  222.    case ir_unop_find_lsb:
  223.       for (i = 0; i < vector_elements; i++) {
  224.          ir_rvalue *op0 = get_element(op_var[0], i);
  225.  
  226.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  227.                                                   element_type,
  228.                                                   op0,
  229.                                                   NULL));
  230.       }
  231.       break;
  232.  
  233.    case ir_binop_add:
  234.    case ir_binop_sub:
  235.    case ir_binop_mul:
  236.    case ir_binop_div:
  237.    case ir_binop_mod:
  238.    case ir_binop_min:
  239.    case ir_binop_max:
  240.    case ir_binop_pow:
  241.    case ir_binop_lshift:
  242.    case ir_binop_rshift:
  243.    case ir_binop_bit_and:
  244.    case ir_binop_bit_xor:
  245.    case ir_binop_bit_or:
  246.    case ir_binop_less:
  247.    case ir_binop_greater:
  248.    case ir_binop_lequal:
  249.    case ir_binop_gequal:
  250.    case ir_binop_equal:
  251.    case ir_binop_nequal:
  252.       for (i = 0; i < vector_elements; i++) {
  253.          ir_rvalue *op0 = get_element(op_var[0], i);
  254.          ir_rvalue *op1 = get_element(op_var[1], i);
  255.  
  256.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  257.                                                   element_type,
  258.                                                   op0,
  259.                                                   op1));
  260.       }
  261.       break;
  262.  
  263.    case ir_unop_any: {
  264.       ir_expression *temp;
  265.       temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
  266.                                         element_type,
  267.                                         get_element(op_var[0], 0),
  268.                                         get_element(op_var[0], 1));
  269.  
  270.       for (i = 2; i < vector_elements; i++) {
  271.          temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
  272.                                            element_type,
  273.                                            get_element(op_var[0], i),
  274.                                            temp);
  275.       }
  276.       assign(ir, 0, temp);
  277.       break;
  278.    }
  279.  
  280.    case ir_binop_dot: {
  281.       ir_expression *last = NULL;
  282.       for (i = 0; i < vector_elements; i++) {
  283.          ir_rvalue *op0 = get_element(op_var[0], i);
  284.          ir_rvalue *op1 = get_element(op_var[1], i);
  285.          ir_expression *temp;
  286.  
  287.          temp = new(mem_ctx) ir_expression(ir_binop_mul,
  288.                                            element_type,
  289.                                            op0,
  290.                                            op1);
  291.          if (last) {
  292.             last = new(mem_ctx) ir_expression(ir_binop_add,
  293.                                               element_type,
  294.                                               temp,
  295.                                               last);
  296.          } else {
  297.             last = temp;
  298.          }
  299.       }
  300.       assign(ir, 0, last);
  301.       break;
  302.    }
  303.  
  304.    case ir_binop_logic_and:
  305.    case ir_binop_logic_xor:
  306.    case ir_binop_logic_or:
  307.       ir->print();
  308.       printf("\n");
  309.       assert(!"not reached: expression operates on scalars only");
  310.       break;
  311.    case ir_binop_all_equal:
  312.    case ir_binop_any_nequal: {
  313.       ir_expression *last = NULL;
  314.       for (i = 0; i < vector_elements; i++) {
  315.          ir_rvalue *op0 = get_element(op_var[0], i);
  316.          ir_rvalue *op1 = get_element(op_var[1], i);
  317.          ir_expression *temp;
  318.          ir_expression_operation join;
  319.  
  320.          if (expr->operation == ir_binop_all_equal)
  321.             join = ir_binop_logic_and;
  322.          else
  323.             join = ir_binop_logic_or;
  324.  
  325.          temp = new(mem_ctx) ir_expression(expr->operation,
  326.                                            element_type,
  327.                                            op0,
  328.                                            op1);
  329.          if (last) {
  330.             last = new(mem_ctx) ir_expression(join,
  331.                                               element_type,
  332.                                               temp,
  333.                                               last);
  334.          } else {
  335.             last = temp;
  336.          }
  337.       }
  338.       assign(ir, 0, last);
  339.       break;
  340.    }
  341.    case ir_unop_noise:
  342.       assert(!"noise should have been broken down to function call");
  343.       break;
  344.  
  345.    case ir_binop_bfm: {
  346.       /* Does not need to be scalarized, since its result will be identical
  347.        * for all channels.
  348.        */
  349.       ir_rvalue *op0 = get_element(op_var[0], 0);
  350.       ir_rvalue *op1 = get_element(op_var[1], 0);
  351.  
  352.       assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
  353.                                                element_type,
  354.                                                op0,
  355.                                                op1));
  356.       break;
  357.    }
  358.  
  359.    case ir_binop_ubo_load:
  360.       assert(!"not yet supported");
  361.       break;
  362.  
  363.    case ir_triop_lrp:
  364.    case ir_triop_bitfield_extract:
  365.       for (i = 0; i < vector_elements; i++) {
  366.          ir_rvalue *op0 = get_element(op_var[0], i);
  367.          ir_rvalue *op1 = get_element(op_var[1], i);
  368.          ir_rvalue *op2 = get_element(op_var[2], i);
  369.  
  370.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  371.                                                   element_type,
  372.                                                   op0,
  373.                                                   op1,
  374.                                                   op2));
  375.       }
  376.       break;
  377.  
  378.    case ir_triop_bfi: {
  379.       /* Only a single BFM is needed for multiple BFIs. */
  380.       ir_rvalue *op0 = get_element(op_var[0], 0);
  381.  
  382.       for (i = 0; i < vector_elements; i++) {
  383.          ir_rvalue *op1 = get_element(op_var[1], i);
  384.          ir_rvalue *op2 = get_element(op_var[2], i);
  385.  
  386.          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
  387.                                                   element_type,
  388.                                                   op0->clone(mem_ctx, NULL),
  389.                                                   op1,
  390.                                                   op2));
  391.       }
  392.       break;
  393.    }
  394.  
  395.    case ir_unop_pack_snorm_2x16:
  396.    case ir_unop_pack_snorm_4x8:
  397.    case ir_unop_pack_unorm_2x16:
  398.    case ir_unop_pack_unorm_4x8:
  399.    case ir_unop_pack_half_2x16:
  400.    case ir_unop_unpack_snorm_2x16:
  401.    case ir_unop_unpack_snorm_4x8:
  402.    case ir_unop_unpack_unorm_2x16:
  403.    case ir_unop_unpack_unorm_4x8:
  404.    case ir_unop_unpack_half_2x16:
  405.    case ir_binop_vector_extract:
  406.    case ir_triop_vector_insert:
  407.    case ir_quadop_bitfield_insert:
  408.    case ir_quadop_vector:
  409.       assert(!"should have been lowered");
  410.       break;
  411.  
  412.    case ir_unop_unpack_half_2x16_split_x:
  413.    case ir_unop_unpack_half_2x16_split_y:
  414.    case ir_binop_pack_half_2x16_split:
  415.       assert("!not reached: expression operates on scalars only");
  416.       break;
  417.    }
  418.  
  419.    ir->remove();
  420.    this->progress = true;
  421.  
  422.    return visit_continue;
  423. }
  424.