Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file lower_instructions.cpp
  26.  *
  27.  * Many GPUs lack native instructions for certain expression operations, and
  28.  * must replace them with some other expression tree.  This pass lowers some
  29.  * of the most common cases, allowing the lowering code to be implemented once
  30.  * rather than in each driver backend.
  31.  *
  32.  * Currently supported transformations:
  33.  * - SUB_TO_ADD_NEG
  34.  * - DIV_TO_MUL_RCP
  35.  * - EXP_TO_EXP2
  36.  * - POW_TO_EXP2
  37.  * - LOG_TO_LOG2
  38.  * - MOD_TO_FRACT
  39.  *
  40.  * SUB_TO_ADD_NEG:
  41.  * ---------------
  42.  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  43.  *
  44.  * This simplifies expression reassociation, and for many backends
  45.  * there is no subtract operation separate from adding the negation.
  46.  * For backends with native subtract operations, they will probably
  47.  * want to recognize add(op0, neg(op1)) or the other way around to
  48.  * produce a subtract anyway.
  49.  *
  50.  * DIV_TO_MUL_RCP:
  51.  * ---------------
  52.  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
  53.  *
  54.  * Many GPUs don't have a divide instruction (945 and 965 included),
  55.  * but they do have an RCP instruction to compute an approximate
  56.  * reciprocal.  By breaking the operation down, constant reciprocals
  57.  * can get constant folded.
  58.  *
  59.  * EXP_TO_EXP2 and LOG_TO_LOG2:
  60.  * ----------------------------
  61.  * Many GPUs don't have a base e log or exponent instruction, but they
  62.  * do have base 2 versions, so this pass converts exp and log to exp2
  63.  * and log2 operations.
  64.  *
  65.  * POW_TO_EXP2:
  66.  * -----------
  67.  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  68.  * x**y to 2**(y * log2(x)).
  69.  *
  70.  * MOD_TO_FRACT:
  71.  * -------------
  72.  * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
  73.  *
  74.  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  75.  * if we have to break it down like this anyway, it gives an
  76.  * opportunity to do things like constant fold the (1.0 / op1) easily.
  77.  */
  78.  
  79. #include "main/core.h" /* for M_LOG2E */
  80. #include "glsl_types.h"
  81. #include "ir.h"
  82. #include "ir_optimization.h"
  83.  
  84. class lower_instructions_visitor : public ir_hierarchical_visitor {
  85. public:
  86.    lower_instructions_visitor(unsigned lower)
  87.       : progress(false), lower(lower) { }
  88.  
  89.    ir_visitor_status visit_leave(ir_expression *);
  90.  
  91.    bool progress;
  92.  
  93. private:
  94.    unsigned lower; /** Bitfield of which operations to lower */
  95.  
  96.    void sub_to_add_neg(ir_expression *);
  97.    void div_to_mul_rcp(ir_expression *);
  98.    void mod_to_fract(ir_expression *);
  99.    void exp_to_exp2(ir_expression *);
  100.    void pow_to_exp2(ir_expression *);
  101.    void log_to_log2(ir_expression *);
  102. };
  103.  
  104. /**
  105.  * Determine if a particular type of lowering should occur
  106.  */
  107. #define lowering(x) (this->lower & x)
  108.  
  109. bool
  110. lower_instructions(exec_list *instructions, unsigned what_to_lower)
  111. {
  112.    lower_instructions_visitor v(what_to_lower);
  113.  
  114.    visit_list_elements(&v, instructions);
  115.    return v.progress;
  116. }
  117.  
  118. void
  119. lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
  120. {
  121.    ir->operation = ir_binop_add;
  122.    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
  123.                                            ir->operands[1], NULL);
  124.    this->progress = true;
  125. }
  126.  
  127. void
  128. lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
  129. {
  130.    if (!ir->operands[1]->type->is_integer()) {
  131.       /* New expression for the 1.0 / op1 */
  132.       ir_rvalue *expr;
  133.       expr = new(ir) ir_expression(ir_unop_rcp,
  134.                                    ir->operands[1]->type,
  135.                                    ir->operands[1],
  136.                                    NULL);
  137.  
  138.       /* op0 / op1 -> op0 * (1.0 / op1) */
  139.       ir->operation = ir_binop_mul;
  140.       ir->operands[1] = expr;
  141.    } else {
  142.       /* Be careful with integer division -- we need to do it as a
  143.        * float and re-truncate, since rcp(n > 1) of an integer would
  144.        * just be 0.
  145.        */
  146.       ir_rvalue *op0, *op1;
  147.       const struct glsl_type *vec_type;
  148.  
  149.       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
  150.                                          ir->operands[1]->type->vector_elements,
  151.                                          ir->operands[1]->type->matrix_columns);
  152.  
  153.       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
  154.          op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
  155.       else
  156.          op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
  157.  
  158.       op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
  159.  
  160.       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
  161.                                          ir->operands[0]->type->vector_elements,
  162.                                          ir->operands[0]->type->matrix_columns);
  163.  
  164.       if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
  165.          op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
  166.       else
  167.          op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
  168.  
  169.       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
  170.  
  171.       ir->operation = ir_unop_f2i;
  172.       ir->operands[0] = op0;
  173.       ir->operands[1] = NULL;
  174.    }
  175.  
  176.    this->progress = true;
  177. }
  178.  
  179. void
  180. lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
  181. {
  182.    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
  183.  
  184.    ir->operation = ir_unop_exp2;
  185.    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
  186.                                            ir->operands[0], log2_e);
  187.    this->progress = true;
  188. }
  189.  
  190. void
  191. lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
  192. {
  193.    ir_expression *const log2_x =
  194.       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
  195.                             ir->operands[0]);
  196.  
  197.    ir->operation = ir_unop_exp2;
  198.    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
  199.                                            ir->operands[1], log2_x);
  200.    ir->operands[1] = NULL;
  201.    this->progress = true;
  202. }
  203.  
  204. void
  205. lower_instructions_visitor::log_to_log2(ir_expression *ir)
  206. {
  207.    ir->operation = ir_binop_mul;
  208.    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
  209.                                            ir->operands[0], NULL);
  210.    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
  211.    this->progress = true;
  212. }
  213.  
  214. void
  215. lower_instructions_visitor::mod_to_fract(ir_expression *ir)
  216. {
  217.    ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
  218.                                            ir_var_temporary);
  219.    this->base_ir->insert_before(temp);
  220.  
  221.    ir_assignment *const assign =
  222.       new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
  223.                             ir->operands[1], NULL);
  224.  
  225.    this->base_ir->insert_before(assign);
  226.  
  227.    ir_expression *const div_expr =
  228.       new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
  229.                             ir->operands[0],
  230.                             new(ir) ir_dereference_variable(temp));
  231.  
  232.    /* Don't generate new IR that would need to be lowered in an additional
  233.     * pass.
  234.     */
  235.    if (lowering(DIV_TO_MUL_RCP))
  236.       div_to_mul_rcp(div_expr);
  237.  
  238.    ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
  239.                                            ir->operands[0]->type,
  240.                                            div_expr,
  241.                                            NULL);
  242.  
  243.    ir->operation = ir_binop_mul;
  244.    ir->operands[0] = new(ir) ir_dereference_variable(temp);
  245.    ir->operands[1] = expr;
  246.    this->progress = true;
  247. }
  248.  
  249. ir_visitor_status
  250. lower_instructions_visitor::visit_leave(ir_expression *ir)
  251. {
  252.    switch (ir->operation) {
  253.    case ir_binop_sub:
  254.       if (lowering(SUB_TO_ADD_NEG))
  255.          sub_to_add_neg(ir);
  256.       break;
  257.  
  258.    case ir_binop_div:
  259.       if (lowering(DIV_TO_MUL_RCP))
  260.          div_to_mul_rcp(ir);
  261.       break;
  262.  
  263.    case ir_unop_exp:
  264.       if (lowering(EXP_TO_EXP2))
  265.          exp_to_exp2(ir);
  266.       break;
  267.  
  268.    case ir_unop_log:
  269.       if (lowering(LOG_TO_LOG2))
  270.          log_to_log2(ir);
  271.       break;
  272.  
  273.    case ir_binop_mod:
  274.       if (lowering(MOD_TO_FRACT))
  275.          mod_to_fract(ir);
  276.       break;
  277.  
  278.    case ir_binop_pow:
  279.       if (lowering(POW_TO_EXP2))
  280.          pow_to_exp2(ir);
  281.       break;
  282.  
  283.    default:
  284.       return visit_continue;
  285.    }
  286.  
  287.    return visit_continue;
  288. }
  289.