Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2013 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file opt_cse.cpp
  26.  *
  27.  * constant subexpression elimination at the GLSL IR level.
  28.  *
  29.  * Compare to brw_fs_cse.cpp for a more complete CSE implementation.  This one
  30.  * is generic and handles texture operations, but it's rather simple currently
  31.  * and doesn't support modification of variables in the available expressions
  32.  * list, so it can't do variables other than uniforms or shader inputs.
  33.  */
  34.  
  35. #include "ir.h"
  36. #include "ir_visitor.h"
  37. #include "ir_rvalue_visitor.h"
  38. #include "ir_basic_block.h"
  39. #include "ir_optimization.h"
  40. #include "ir_builder.h"
  41. #include "glsl_types.h"
  42.  
  43. using namespace ir_builder;
  44.  
  45. static bool debug = false;
  46.  
  47. namespace {
  48.  
  49. /**
  50.  * This is the record of an available expression for common subexpression
  51.  * elimination.
  52.  */
  53. class ae_entry : public exec_node
  54. {
  55. public:
  56.    ae_entry(ir_instruction *base_ir, ir_rvalue **val)
  57.       : val(val), base_ir(base_ir)
  58.    {
  59.       assert(val);
  60.       assert(*val);
  61.       assert(base_ir);
  62.  
  63.       var = NULL;
  64.    }
  65.  
  66.    void init(ir_instruction *base_ir, ir_rvalue **val)
  67.    {
  68.       this->val = val;
  69.       this->base_ir = base_ir;
  70.       this->var = NULL;
  71.  
  72.       assert(val);
  73.       assert(*val);
  74.       assert(base_ir);
  75.    }
  76.  
  77.    /**
  78.     * The pointer to the expression that we might be able to reuse
  79.     *
  80.     * Note the double pointer -- this is the place in the base_ir expression
  81.     * tree that we would rewrite to move the expression out to a new variable
  82.     * assignment.
  83.     */
  84.    ir_rvalue **val;
  85.  
  86.    /**
  87.     * Root instruction in the basic block where the expression appeared.
  88.     *
  89.     * This is used so that we can insert the new variable declaration into the
  90.     * instruction stream (since *val is just somewhere in base_ir's expression
  91.     * tree).
  92.     */
  93.    ir_instruction *base_ir;
  94.  
  95.    /**
  96.     * The variable that the expression has been stored in, if it's been CSEd
  97.     * once already.
  98.     */
  99.    ir_variable *var;
  100. };
  101.  
  102. class cse_visitor : public ir_rvalue_visitor {
  103. public:
  104.    cse_visitor(exec_list *validate_instructions)
  105.       : validate_instructions(validate_instructions)
  106.    {
  107.       progress = false;
  108.       mem_ctx = ralloc_context(NULL);
  109.       this->ae = new(mem_ctx) exec_list;
  110.    }
  111.    ~cse_visitor()
  112.    {
  113.       ralloc_free(mem_ctx);
  114.    }
  115.  
  116.    virtual ir_visitor_status visit_enter(ir_function_signature *ir);
  117.    virtual ir_visitor_status visit_enter(ir_loop *ir);
  118.    virtual ir_visitor_status visit_enter(ir_if *ir);
  119.    virtual ir_visitor_status visit_enter(ir_call *ir);
  120.    virtual void handle_rvalue(ir_rvalue **rvalue);
  121.  
  122.    bool progress;
  123.  
  124. private:
  125.    void *mem_ctx;
  126.  
  127.    ir_rvalue *try_cse(ir_rvalue *rvalue);
  128.    void add_to_ae(ir_rvalue **rvalue);
  129.  
  130.    /**
  131.     * Move all nodes from the ae list to the free list
  132.     */
  133.    void empty_ae_list();
  134.  
  135.    /**
  136.     * Get and initialize a new ae_entry
  137.     *
  138.     * This will either come from the free list or be freshly allocated.
  139.     */
  140.    ae_entry *get_ae_entry(ir_rvalue **rvalue);
  141.  
  142.    /** List of ae_entry: The available expressions to reuse */
  143.    exec_list *ae;
  144.  
  145.    /**
  146.     * The whole shader, so that we can validate_ir_tree in debug mode.
  147.     *
  148.     * This proved quite useful when trying to get the tree manipulation
  149.     * right.
  150.     */
  151.    exec_list *validate_instructions;
  152.  
  153.    /**
  154.     * List of available-for-use ae_entry objects.
  155.     */
  156.    exec_list free_ae_entries;
  157. };
  158.  
  159. /**
  160.  * Visitor to walk an expression tree to check that all variables referenced
  161.  * are constants.
  162.  */
  163. class is_cse_candidate_visitor : public ir_hierarchical_visitor
  164. {
  165. public:
  166.  
  167.    is_cse_candidate_visitor()
  168.       : ok(true)
  169.    {
  170.    }
  171.  
  172.    virtual ir_visitor_status visit(ir_dereference_variable *ir);
  173.  
  174.    bool ok;
  175. };
  176.  
  177.  
  178. class contains_rvalue_visitor : public ir_rvalue_visitor
  179. {
  180. public:
  181.  
  182.    contains_rvalue_visitor(ir_rvalue *val)
  183.       : val(val)
  184.    {
  185.       found = false;
  186.    }
  187.  
  188.    virtual void handle_rvalue(ir_rvalue **rvalue);
  189.  
  190.    bool found;
  191.  
  192. private:
  193.    ir_rvalue *val;
  194. };
  195.  
  196. } /* unnamed namespace */
  197.  
  198. static void
  199. dump_ae(exec_list *ae)
  200. {
  201.    int i = 0;
  202.  
  203.    printf("CSE: AE contents:\n");
  204.    foreach_in_list(ae_entry, entry, ae) {
  205.       printf("CSE:   AE %2d (%p): ", i, entry);
  206.       (*entry->val)->print();
  207.       printf("\n");
  208.  
  209.       if (entry->var)
  210.          printf("CSE:     in var %p:\n", entry->var);
  211.  
  212.       i++;
  213.    }
  214. }
  215.  
  216. ir_visitor_status
  217. is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
  218. {
  219.    /* Currently, since we don't handle kills of the ae based on variables
  220.     * getting assigned, we can only handle constant variables.
  221.     */
  222.    if (ir->var->data.read_only) {
  223.       return visit_continue;
  224.    } else {
  225.       if (debug)
  226.          printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
  227.       ok = false;
  228.       return visit_stop;
  229.    }
  230. }
  231.  
  232. void
  233. contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
  234. {
  235.    if (*rvalue == val)
  236.       found = true;
  237. }
  238.  
  239. static bool
  240. contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
  241. {
  242.    contains_rvalue_visitor v(needle);
  243.    haystack->accept(&v);
  244.    return v.found;
  245. }
  246.  
  247. static bool
  248. is_cse_candidate(ir_rvalue *ir)
  249. {
  250.    /* Our temporary variable assignment generation isn't ready to handle
  251.     * anything bigger than a vector.
  252.     */
  253.    if (!ir->type->is_vector() && !ir->type->is_scalar()) {
  254.       if (debug)
  255.          printf("CSE: non-candidate: not a vector/scalar\n");
  256.       return false;
  257.    }
  258.  
  259.    /* Only handle expressions and textures currently.  We may want to extend
  260.     * to variable-index array dereferences at some point.
  261.     */
  262.    switch (ir->ir_type) {
  263.    case ir_type_expression:
  264.    case ir_type_texture:
  265.       break;
  266.    default:
  267.       if (debug)
  268.          printf("CSE: non-candidate: not an expression/texture\n");
  269.       return false;
  270.    }
  271.  
  272.    is_cse_candidate_visitor v;
  273.  
  274.    ir->accept(&v);
  275.  
  276.    return v.ok;
  277. }
  278.  
  279. /**
  280.  * Tries to find and return a reference to a previous computation of a given
  281.  * expression.
  282.  *
  283.  * Walk the list of available expressions checking if any of them match the
  284.  * rvalue, and if so, move the previous copy of the expression to a temporary
  285.  * and return a reference of the temporary.
  286.  */
  287. ir_rvalue *
  288. cse_visitor::try_cse(ir_rvalue *rvalue)
  289. {
  290.    foreach_in_list(ae_entry, entry, ae) {
  291.       if (debug) {
  292.          printf("Comparing to AE %p: ", entry);
  293.          (*entry->val)->print();
  294.          printf("\n");
  295.       }
  296.  
  297.       if (!rvalue->equals(*entry->val))
  298.          continue;
  299.  
  300.       if (debug) {
  301.          printf("CSE: Replacing: ");
  302.          (*entry->val)->print();
  303.          printf("\n");
  304.          printf("CSE:      with: ");
  305.          rvalue->print();
  306.          printf("\n");
  307.       }
  308.  
  309.       if (!entry->var) {
  310.          ir_instruction *base_ir = entry->base_ir;
  311.  
  312.          ir_variable *var = new(rvalue) ir_variable(rvalue->type,
  313.                                                     "cse",
  314.                                                     ir_var_temporary);
  315.  
  316.          /* Write the previous expression result into a new variable. */
  317.          base_ir->insert_before(var);
  318.          ir_assignment *assignment = assign(var, *entry->val);
  319.          base_ir->insert_before(assignment);
  320.  
  321.          /* Replace the expression in the original tree with a deref of the
  322.           * variable, but keep tracking the expression for further reuse.
  323.           */
  324.          *entry->val = new(rvalue) ir_dereference_variable(var);
  325.          entry->val = &assignment->rhs;
  326.  
  327.          entry->var = var;
  328.  
  329.          /* Update the base_irs in the AE list.  We have to be sure that
  330.           * they're correct -- expressions from our base_ir that weren't moved
  331.           * need to stay in this base_ir (so that later consumption of them
  332.           * puts new variables between our new variable and our base_ir), but
  333.           * expressions from our base_ir that we *did* move need base_ir
  334.           * updated so that any further elimination from inside gets its new
  335.           * assignments put before our new assignment.
  336.           */
  337.          foreach_in_list(ae_entry, fixup_entry, ae) {
  338.             if (contains_rvalue(assignment->rhs, *fixup_entry->val))
  339.                fixup_entry->base_ir = assignment;
  340.          }
  341.  
  342.          if (debug)
  343.             dump_ae(ae);
  344.       }
  345.  
  346.       /* Replace the expression in our current tree with the variable. */
  347.       return new(rvalue) ir_dereference_variable(entry->var);
  348.    }
  349.  
  350.    return NULL;
  351. }
  352.  
  353. void
  354. cse_visitor::empty_ae_list()
  355. {
  356.    free_ae_entries.append_list(ae);
  357. }
  358.  
  359. ae_entry *
  360. cse_visitor::get_ae_entry(ir_rvalue **rvalue)
  361. {
  362.    ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
  363.    if (entry) {
  364.       entry->init(base_ir, rvalue);
  365.    } else {
  366.       entry = new(mem_ctx) ae_entry(base_ir, rvalue);
  367.    }
  368.  
  369.    return entry;
  370. }
  371.  
  372. /** Add the rvalue to the list of available expressions for CSE. */
  373. void
  374. cse_visitor::add_to_ae(ir_rvalue **rvalue)
  375. {
  376.    if (debug) {
  377.       printf("CSE: Add to AE: ");
  378.       (*rvalue)->print();
  379.       printf("\n");
  380.    }
  381.  
  382.    ae->push_tail(get_ae_entry(rvalue));
  383.  
  384.    if (debug)
  385.       dump_ae(ae);
  386. }
  387.  
  388. void
  389. cse_visitor::handle_rvalue(ir_rvalue **rvalue)
  390. {
  391.    if (!*rvalue)
  392.       return;
  393.  
  394.    if (debug) {
  395.       printf("CSE: handle_rvalue ");
  396.       (*rvalue)->print();
  397.       printf("\n");
  398.    }
  399.  
  400.    if (!is_cse_candidate(*rvalue))
  401.       return;
  402.  
  403.    ir_rvalue *new_rvalue = try_cse(*rvalue);
  404.    if (new_rvalue) {
  405.       *rvalue = new_rvalue;
  406.       progress = true;
  407.  
  408.       if (debug)
  409.          validate_ir_tree(validate_instructions);
  410.    } else {
  411.       add_to_ae(rvalue);
  412.    }
  413. }
  414.  
  415. ir_visitor_status
  416. cse_visitor::visit_enter(ir_if *ir)
  417. {
  418.    handle_rvalue(&ir->condition);
  419.  
  420.    empty_ae_list();
  421.    visit_list_elements(this, &ir->then_instructions);
  422.  
  423.    empty_ae_list();
  424.    visit_list_elements(this, &ir->else_instructions);
  425.  
  426.    empty_ae_list();
  427.    return visit_continue_with_parent;
  428. }
  429.  
  430. ir_visitor_status
  431. cse_visitor::visit_enter(ir_function_signature *ir)
  432. {
  433.    empty_ae_list();
  434.    visit_list_elements(this, &ir->body);
  435.  
  436.    empty_ae_list();
  437.    return visit_continue_with_parent;
  438. }
  439.  
  440. ir_visitor_status
  441. cse_visitor::visit_enter(ir_loop *ir)
  442. {
  443.    empty_ae_list();
  444.    visit_list_elements(this, &ir->body_instructions);
  445.  
  446.    empty_ae_list();
  447.    return visit_continue_with_parent;
  448. }
  449.  
  450. ir_visitor_status
  451. cse_visitor::visit_enter(ir_call *)
  452. {
  453.    /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
  454.     * pointer to the (ir_rvalue *) on the stack.  Since we save those pointers
  455.     * in the AE list, we can't let handle_rvalue get called.
  456.     */
  457.    return visit_continue_with_parent;
  458. }
  459.  
  460. /**
  461.  * Does a (uniform-value) constant subexpression elimination pass on the code
  462.  * present in the instruction stream.
  463.  */
  464. bool
  465. do_cse(exec_list *instructions)
  466. {
  467.    cse_visitor v(instructions);
  468.  
  469.    visit_list_elements(&v, instructions);
  470.  
  471.    return v.progress;
  472. }
  473.