Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010 Luca Barbieri
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file lower_variable_index_to_cond_assign.cpp
  26.  *
  27.  * Turns non-constant indexing into array types to a series of
  28.  * conditional moves of each element into a temporary.
  29.  *
  30.  * Pre-DX10 GPUs often don't have a native way to do this operation,
  31.  * and this works around that.
  32.  */
  33.  
  34. #include "ir.h"
  35. #include "ir_rvalue_visitor.h"
  36. #include "ir_optimization.h"
  37. #include "glsl_types.h"
  38. #include "main/macros.h"
  39.  
  40. struct assignment_generator
  41. {
  42.    ir_instruction* base_ir;
  43.    ir_rvalue* array;
  44.    bool is_write;
  45.    unsigned int write_mask;
  46.    ir_variable* var;
  47.  
  48.    assignment_generator()
  49.    {
  50.    }
  51.  
  52.    void generate(unsigned i, ir_rvalue* condition, exec_list *list) const
  53.    {
  54.       /* Just clone the rest of the deref chain when trying to get at the
  55.        * underlying variable.
  56.        */
  57.       void *mem_ctx = ralloc_parent(base_ir);
  58.       ir_dereference *element =
  59.          new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL),
  60.                                            new(mem_ctx) ir_constant(i));
  61.       ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
  62.  
  63.       ir_assignment *assignment;
  64.       if (is_write) {
  65.          assignment = new(mem_ctx) ir_assignment(element, variable, condition,
  66.                                                  write_mask);
  67.       } else {
  68.          assignment = new(mem_ctx) ir_assignment(variable, element, condition);
  69.       }
  70.  
  71.       list->push_tail(assignment);
  72.    }
  73. };
  74.  
  75. struct switch_generator
  76. {
  77.    /* make TFunction a template parameter if you need to use other generators */
  78.    typedef assignment_generator TFunction;
  79.    const TFunction& generator;
  80.  
  81.    ir_variable* index;
  82.    unsigned linear_sequence_max_length;
  83.    unsigned condition_components;
  84.  
  85.    void *mem_ctx;
  86.  
  87.    switch_generator(const TFunction& generator, ir_variable *index,
  88.                     unsigned linear_sequence_max_length,
  89.                     unsigned condition_components)
  90.       : generator(generator), index(index),
  91.         linear_sequence_max_length(linear_sequence_max_length),
  92.         condition_components(condition_components)
  93.    {
  94.       this->mem_ctx = ralloc_parent(index);
  95.    }
  96.  
  97.    void linear_sequence(unsigned begin, unsigned end, exec_list *list)
  98.    {
  99.       if (begin == end)
  100.          return;
  101.  
  102.       /* If the array access is a read, read the first element of this subregion
  103.        * unconditionally.  The remaining tests will possibly overwrite this
  104.        * value with one of the other array elements.
  105.        *
  106.        * This optimization cannot be done for writes because it will cause the
  107.        * first element of the subregion to be written possibly *in addition* to
  108.        * one of the other elements.
  109.        */
  110.       unsigned first;
  111.       if (!this->generator.is_write) {
  112.          this->generator.generate(begin, 0, list);
  113.          first = begin + 1;
  114.       } else {
  115.          first = begin;
  116.       }
  117.  
  118.       for (unsigned i = first; i < end; i += 4) {
  119.          const unsigned comps = MIN2(condition_components, end - i);
  120.  
  121.          ir_rvalue *broadcast_index =
  122.             new(this->mem_ctx) ir_dereference_variable(index);
  123.  
  124.          if (comps) {
  125.             const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false };
  126.             broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m);
  127.          }
  128.  
  129.          /* Compare the desired index value with the next block of four indices.
  130.           */
  131.          ir_constant_data test_indices_data;
  132.          memset(&test_indices_data, 0, sizeof(test_indices_data));
  133.          test_indices_data.i[0] = i;
  134.          test_indices_data.i[1] = i + 1;
  135.          test_indices_data.i[2] = i + 2;
  136.          test_indices_data.i[3] = i + 3;
  137.          ir_constant *const test_indices =
  138.             new(this->mem_ctx) ir_constant(broadcast_index->type,
  139.                                            &test_indices_data);
  140.  
  141.          ir_rvalue *const condition_val =
  142.             new(this->mem_ctx) ir_expression(ir_binop_equal,
  143.                                              &glsl_type::bool_type[comps - 1],
  144.                                              broadcast_index,
  145.                                              test_indices);
  146.  
  147.          ir_variable *const condition =
  148.             new(this->mem_ctx) ir_variable(condition_val->type,
  149.                                            "dereference_array_condition",
  150.                                            ir_var_temporary);
  151.          list->push_tail(condition);
  152.  
  153.          ir_rvalue *const cond_deref =
  154.             new(this->mem_ctx) ir_dereference_variable(condition);
  155.          list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref,
  156.                                                           condition_val, 0));
  157.  
  158.          if (comps == 1) {
  159.             ir_rvalue *const cond_deref =
  160.                new(this->mem_ctx) ir_dereference_variable(condition);
  161.  
  162.             this->generator.generate(i, cond_deref, list);
  163.          } else {
  164.             for (unsigned j = 0; j < comps; j++) {
  165.                ir_rvalue *const cond_deref =
  166.                   new(this->mem_ctx) ir_dereference_variable(condition);
  167.                ir_rvalue *const cond_swiz =
  168.                   new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1);
  169.  
  170.                this->generator.generate(i + j, cond_swiz, list);
  171.             }
  172.          }
  173.       }
  174.    }
  175.  
  176.    void bisect(unsigned begin, unsigned end, exec_list *list)
  177.    {
  178.       unsigned middle = (begin + end) >> 1;
  179.  
  180.       assert(index->type->is_integer());
  181.  
  182.       ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT)
  183.          ? new(this->mem_ctx) ir_constant((unsigned)middle)
  184.          : new(this->mem_ctx) ir_constant((int)middle);
  185.  
  186.  
  187.       ir_dereference_variable *deref =
  188.          new(this->mem_ctx) ir_dereference_variable(this->index);
  189.  
  190.       ir_expression *less =
  191.          new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type,
  192.                                           deref, middle_c);
  193.  
  194.       ir_if *if_less = new(this->mem_ctx) ir_if(less);
  195.  
  196.       generate(begin, middle, &if_less->then_instructions);
  197.       generate(middle, end, &if_less->else_instructions);
  198.  
  199.       list->push_tail(if_less);
  200.    }
  201.  
  202.    void generate(unsigned begin, unsigned end, exec_list *list)
  203.    {
  204.       unsigned length = end - begin;
  205.       if (length <= this->linear_sequence_max_length)
  206.          return linear_sequence(begin, end, list);
  207.       else
  208.          return bisect(begin, end, list);
  209.    }
  210. };
  211.  
  212. /**
  213.  * Visitor class for replacing expressions with ir_constant values.
  214.  */
  215.  
  216. class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
  217. public:
  218.    variable_index_to_cond_assign_visitor(bool lower_input,
  219.                                          bool lower_output,
  220.                                          bool lower_temp,
  221.                                          bool lower_uniform)
  222.    {
  223.       this->progress = false;
  224.       this->lower_inputs = lower_input;
  225.       this->lower_outputs = lower_output;
  226.       this->lower_temps = lower_temp;
  227.       this->lower_uniforms = lower_uniform;
  228.    }
  229.  
  230.    bool progress;
  231.    bool lower_inputs;
  232.    bool lower_outputs;
  233.    bool lower_temps;
  234.    bool lower_uniforms;
  235.  
  236.    bool is_array_or_matrix(const ir_instruction *ir) const
  237.    {
  238.       return (ir->type->is_array() || ir->type->is_matrix());
  239.    }
  240.  
  241.    bool needs_lowering(ir_dereference_array *deref) const
  242.    {
  243.       if (deref == NULL || deref->array_index->as_constant()
  244.           || !is_array_or_matrix(deref->array))
  245.          return false;
  246.  
  247.       if (deref->array->ir_type == ir_type_constant)
  248.          return this->lower_temps;
  249.  
  250.       const ir_variable *const var = deref->array->variable_referenced();
  251.       switch (var->mode) {
  252.       case ir_var_auto:
  253.       case ir_var_temporary:
  254.          return this->lower_temps;
  255.       case ir_var_uniform:
  256.          return this->lower_uniforms;
  257.       case ir_var_in:
  258.          return (var->location == -1) ? this->lower_temps : this->lower_inputs;
  259.       case ir_var_out:
  260.          return (var->location == -1) ? this->lower_temps : this->lower_outputs;
  261.       case ir_var_inout:
  262.          return this->lower_temps;
  263.       }
  264.  
  265.       assert(!"Should not get here.");
  266.       return false;
  267.    }
  268.  
  269.    ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
  270.                                           ir_assignment* orig_assign)
  271.    {
  272.       assert(is_array_or_matrix(orig_deref->array));
  273.  
  274.       const unsigned length = (orig_deref->array->type->is_array())
  275.          ? orig_deref->array->type->length
  276.          : orig_deref->array->type->matrix_columns;
  277.  
  278.       void *const mem_ctx = ralloc_parent(base_ir);
  279.  
  280.       /* Temporary storage for either the result of the dereference of
  281.        * the array, or the RHS that's being assigned into the
  282.        * dereference of the array.
  283.        */
  284.       ir_variable *var;
  285.  
  286.       if (orig_assign) {
  287.          var = new(mem_ctx) ir_variable(orig_assign->rhs->type,
  288.                                         "dereference_array_value",
  289.                                         ir_var_temporary);
  290.          base_ir->insert_before(var);
  291.  
  292.          ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var);
  293.          ir_assignment *assign = new(mem_ctx) ir_assignment(lhs,
  294.                                                             orig_assign->rhs,
  295.                                                             NULL);
  296.  
  297.          base_ir->insert_before(assign);
  298.       } else {
  299.          var = new(mem_ctx) ir_variable(orig_deref->type,
  300.                                         "dereference_array_value",
  301.                                         ir_var_temporary);
  302.          base_ir->insert_before(var);
  303.       }
  304.  
  305.       /* Store the index to a temporary to avoid reusing its tree. */
  306.       ir_variable *index =
  307.          new(mem_ctx) ir_variable(orig_deref->array_index->type,
  308.                                   "dereference_array_index", ir_var_temporary);
  309.       base_ir->insert_before(index);
  310.  
  311.       ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index);
  312.       ir_assignment *assign =
  313.          new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
  314.       base_ir->insert_before(assign);
  315.  
  316.       assignment_generator ag;
  317.       ag.array = orig_deref->array;
  318.       ag.base_ir = base_ir;
  319.       ag.var = var;
  320.       if (orig_assign) {
  321.          ag.is_write = true;
  322.          ag.write_mask = orig_assign->write_mask;
  323.       } else {
  324.          ag.is_write = false;
  325.       }
  326.  
  327.       switch_generator sg(ag, index, 4, 4);
  328.  
  329.       exec_list list;
  330.       sg.generate(0, length, &list);
  331.       base_ir->insert_before(&list);
  332.  
  333.       return var;
  334.    }
  335.  
  336.    virtual void handle_rvalue(ir_rvalue **pir)
  337.    {
  338.       if (!*pir)
  339.          return;
  340.  
  341.       ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
  342.       if (needs_lowering(orig_deref)) {
  343.          ir_variable* var = convert_dereference_array(orig_deref, 0);
  344.          assert(var);
  345.          *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
  346.          this->progress = true;
  347.       }
  348.    }
  349.  
  350.    ir_visitor_status
  351.    visit_leave(ir_assignment *ir)
  352.    {
  353.       ir_rvalue_visitor::visit_leave(ir);
  354.  
  355.       ir_dereference_array *orig_deref = ir->lhs->as_dereference_array();
  356.  
  357.       if (needs_lowering(orig_deref)) {
  358.          convert_dereference_array(orig_deref, ir);
  359.          ir->remove();
  360.          this->progress = true;
  361.       }
  362.  
  363.       return visit_continue;
  364.    }
  365. };
  366.  
  367. bool
  368. lower_variable_index_to_cond_assign(exec_list *instructions,
  369.                                     bool lower_input,
  370.                                     bool lower_output,
  371.                                     bool lower_temp,
  372.                                     bool lower_uniform)
  373. {
  374.    variable_index_to_cond_assign_visitor v(lower_input,
  375.                                            lower_output,
  376.                                            lower_temp,
  377.                                            lower_uniform);
  378.  
  379.    visit_list_elements(&v, instructions);
  380.  
  381.    return v.progress;
  382. }
  383.