Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2013 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file opt_vectorize.cpp
  26.  *
  27.  * Combines scalar assignments of the same expression (modulo swizzle) to
  28.  * multiple channels of the same variable into a single vectorized expression
  29.  * and assignment.
  30.  *
  31.  * Many generated shaders contain scalarized code. That is, they contain
  32.  *
  33.  * r1.x = log2(v0.x);
  34.  * r1.y = log2(v0.y);
  35.  * r1.z = log2(v0.z);
  36.  *
  37.  * rather than
  38.  *
  39.  * r1.xyz = log2(v0.xyz);
  40.  *
  41.  * We look for consecutive assignments of the same expression (modulo swizzle)
  42.  * to each channel of the same variable.
  43.  *
  44.  * For instance, we want to convert these three scalar operations
  45.  *
  46.  * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
  47.  * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
  48.  * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
  49.  *
  50.  * into a single vector operation
  51.  *
  52.  * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
  53.  */
  54.  
  55. #include "ir.h"
  56. #include "ir_visitor.h"
  57. #include "ir_optimization.h"
  58. #include "glsl_types.h"
  59. #include "program/prog_instruction.h"
  60.  
  61. namespace {
  62.  
  63. class ir_vectorize_visitor : public ir_hierarchical_visitor {
  64. public:
  65.    void clear()
  66.    {
  67.       assignment[0] = NULL;
  68.       assignment[1] = NULL;
  69.       assignment[2] = NULL;
  70.       assignment[3] = NULL;
  71.       current_assignment = NULL;
  72.       last_assignment = NULL;
  73.       channels = 0;
  74.       has_swizzle = false;
  75.    }
  76.  
  77.    ir_vectorize_visitor()
  78.    {
  79.       clear();
  80.       progress = false;
  81.    }
  82.  
  83.    virtual ir_visitor_status visit_enter(ir_assignment *);
  84.    virtual ir_visitor_status visit_enter(ir_swizzle *);
  85.    virtual ir_visitor_status visit_enter(ir_dereference_array *);
  86.    virtual ir_visitor_status visit_enter(ir_expression *);
  87.    virtual ir_visitor_status visit_enter(ir_if *);
  88.    virtual ir_visitor_status visit_enter(ir_loop *);
  89.    virtual ir_visitor_status visit_enter(ir_texture *);
  90.  
  91.    virtual ir_visitor_status visit_leave(ir_assignment *);
  92.  
  93.    void try_vectorize();
  94.  
  95.    ir_assignment *assignment[4];
  96.    ir_assignment *current_assignment, *last_assignment;
  97.    unsigned channels;
  98.    bool has_swizzle;
  99.  
  100.    bool progress;
  101. };
  102.  
  103. } /* unnamed namespace */
  104.  
  105. /**
  106.  * Rewrites the swizzles and types of a right-hand side of an assignment.
  107.  *
  108.  * From the example above, this function would be called (by visit_tree()) on
  109.  * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
  110.  * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
  111.  *
  112.  * The function operates on ir_expressions (and its operands) and ir_swizzles.
  113.  * For expressions it sets a new type and swizzles any non-expression and non-
  114.  * swizzle scalar operands into appropriately sized vector arguments. For
  115.  * example, if combining
  116.  *
  117.  * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
  118.  * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
  119.  *
  120.  * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
  121.  * (var_ref v1) such that the final result was
  122.  *
  123.  * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
  124.  *                                              (swiz xx (var_ref v1))))
  125.  *
  126.  * For swizzles, it sets a new type, and if the variable being swizzled is a
  127.  * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
  128.  * data parameter. If the swizzled variable is scalar, then the swizzle was
  129.  * added by an earlier call to rewrite_swizzle() on an expression, so the
  130.  * mask should not be modified.
  131.  */
  132. static void
  133. rewrite_swizzle(ir_instruction *ir, void *data)
  134. {
  135.    ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
  136.  
  137.    switch (ir->ir_type) {
  138.    case ir_type_swizzle: {
  139.       ir_swizzle *swz = (ir_swizzle *)ir;
  140.       if (swz->val->type->is_vector()) {
  141.          swz->mask = *mask;
  142.       }
  143.       swz->type = glsl_type::get_instance(swz->type->base_type,
  144.                                           mask->num_components, 1);
  145.       break;
  146.    }
  147.    case ir_type_expression: {
  148.       ir_expression *expr = (ir_expression *)ir;
  149.       expr->type = glsl_type::get_instance(expr->type->base_type,
  150.                                            mask->num_components, 1);
  151.       for (unsigned i = 0; i < 4; i++) {
  152.          if (expr->operands[i]) {
  153.             ir_rvalue *rval = expr->operands[i]->as_rvalue();
  154.             if (rval && rval->type->is_scalar() &&
  155.                 !rval->as_expression() && !rval->as_swizzle()) {
  156.                expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
  157.                                                       mask->num_components);
  158.             }
  159.          }
  160.       }
  161.       break;
  162.    }
  163.    default:
  164.       break;
  165.    }
  166. }
  167.  
  168. /**
  169.  * Attempt to vectorize the previously saved assignments, and clear them from
  170.  * consideration.
  171.  *
  172.  * If the assignments are able to be combined, it modifies in-place the last
  173.  * assignment seen to be an equivalent vector form of the scalar assignments.
  174.  * It then removes the other now obsolete scalar assignments.
  175.  */
  176. void
  177. ir_vectorize_visitor::try_vectorize()
  178. {
  179.    if (this->last_assignment && this->channels > 1) {
  180.       ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
  181.  
  182.       this->last_assignment->write_mask = 0;
  183.  
  184.       for (unsigned i = 0, j = 0; i < 4; i++) {
  185.          if (this->assignment[i]) {
  186.             this->last_assignment->write_mask |= 1 << i;
  187.  
  188.             if (this->assignment[i] != this->last_assignment) {
  189.                this->assignment[i]->remove();
  190.             }
  191.  
  192.             switch (j) {
  193.             case 0: mask.x = i; break;
  194.             case 1: mask.y = i; break;
  195.             case 2: mask.z = i; break;
  196.             case 3: mask.w = i; break;
  197.             }
  198.  
  199.             j++;
  200.          }
  201.       }
  202.  
  203.       visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
  204.  
  205.       this->progress = true;
  206.    }
  207.    clear();
  208. }
  209.  
  210. /**
  211.  * Returns whether the write mask is a single channel.
  212.  */
  213. static bool
  214. single_channel_write_mask(unsigned write_mask)
  215. {
  216.    return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
  217. }
  218.  
  219. /**
  220.  * Translates single-channeled write mask to single-channeled swizzle.
  221.  */
  222. static unsigned
  223. write_mask_to_swizzle(unsigned write_mask)
  224. {
  225.    switch (write_mask) {
  226.    case WRITEMASK_X: return SWIZZLE_X;
  227.    case WRITEMASK_Y: return SWIZZLE_Y;
  228.    case WRITEMASK_Z: return SWIZZLE_Z;
  229.    case WRITEMASK_W: return SWIZZLE_W;
  230.    }
  231.    unreachable("not reached");
  232. }
  233.  
  234. /**
  235.  * Returns whether a single-channeled write mask matches a swizzle.
  236.  */
  237. static bool
  238. write_mask_matches_swizzle(unsigned write_mask,
  239.                            const ir_swizzle *swz)
  240. {
  241.    return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
  242.            (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
  243.            (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
  244.            (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
  245. }
  246.  
  247. /**
  248.  * Upon entering an ir_assignment, attempt to vectorize the currently tracked
  249.  * assignments if the current assignment is not suitable. Keep a pointer to
  250.  * the current assignment.
  251.  */
  252. ir_visitor_status
  253. ir_vectorize_visitor::visit_enter(ir_assignment *ir)
  254. {
  255.    ir_dereference *lhs = this->last_assignment != NULL ?
  256.                          this->last_assignment->lhs : NULL;
  257.    ir_rvalue *rhs = this->last_assignment != NULL ?
  258.                     this->last_assignment->rhs : NULL;
  259.  
  260.    if (ir->condition ||
  261.        this->channels >= 4 ||
  262.        !single_channel_write_mask(ir->write_mask) ||
  263.        this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
  264.        (lhs && !ir->lhs->equals(lhs)) ||
  265.        (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
  266.       try_vectorize();
  267.    }
  268.  
  269.    this->current_assignment = ir;
  270.  
  271.    return visit_continue;
  272. }
  273.  
  274. /**
  275.  * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
  276.  * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
  277.  * matches the current assignment's write mask.
  278.  *
  279.  * If the write mask doesn't match the swizzle mask, remove the current
  280.  * assignment from further consideration.
  281.  */
  282. ir_visitor_status
  283. ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
  284. {
  285.    if (this->current_assignment) {
  286.       if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
  287.          this->has_swizzle = true;
  288.       } else {
  289.          this->current_assignment = NULL;
  290.       }
  291.    }
  292.    return visit_continue;
  293. }
  294.  
  295. /* Upon entering an ir_array_dereference, remove the current assignment from
  296.  * further consideration. Since the index of an array dereference must scalar,
  297.  * we are not able to vectorize it.
  298.  *
  299.  * FINISHME: If all of scalar indices are identical we could vectorize.
  300.  */
  301. ir_visitor_status
  302. ir_vectorize_visitor::visit_enter(ir_dereference_array *)
  303. {
  304.    this->current_assignment = NULL;
  305.    return visit_continue_with_parent;
  306. }
  307.  
  308. /**
  309.  * Upon entering an ir_expression, remove the current assignment from further
  310.  * consideration if the expression operates horizontally on vectors.
  311.  */
  312. ir_visitor_status
  313. ir_vectorize_visitor::visit_enter(ir_expression *ir)
  314. {
  315.    if (ir->is_horizontal()) {
  316.       this->current_assignment = NULL;
  317.       return visit_continue_with_parent;
  318.    }
  319.    return visit_continue;
  320. }
  321.  
  322. /* Since there is no statement to visit between the "then" and "else"
  323.  * instructions try to vectorize before, in between, and after them to avoid
  324.  * combining statements from different basic blocks.
  325.  */
  326. ir_visitor_status
  327. ir_vectorize_visitor::visit_enter(ir_if *ir)
  328. {
  329.    try_vectorize();
  330.  
  331.    visit_list_elements(this, &ir->then_instructions);
  332.    try_vectorize();
  333.  
  334.    visit_list_elements(this, &ir->else_instructions);
  335.    try_vectorize();
  336.  
  337.    return visit_continue_with_parent;
  338. }
  339.  
  340. /* Since there is no statement to visit between the instructions in the body of
  341.  * the loop and the instructions after it try to vectorize before and after the
  342.  * body to avoid combining statements from different basic blocks.
  343.  */
  344. ir_visitor_status
  345. ir_vectorize_visitor::visit_enter(ir_loop *ir)
  346. {
  347.    try_vectorize();
  348.  
  349.    visit_list_elements(this, &ir->body_instructions);
  350.    try_vectorize();
  351.  
  352.    return visit_continue_with_parent;
  353. }
  354.  
  355. /**
  356.  * Upon entering an ir_texture, remove the current assignment from
  357.  * further consideration. Vectorizing multiple texture lookups into one
  358.  * is wrong.
  359.  */
  360. ir_visitor_status
  361. ir_vectorize_visitor::visit_enter(ir_texture *)
  362. {
  363.    this->current_assignment = NULL;
  364.    return visit_continue_with_parent;
  365. }
  366.  
  367. /**
  368.  * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
  369.  * the swizzle mask(s) found were appropriate. Also save a pointer in
  370.  * ::last_assignment so that we can compare future assignments with it.
  371.  *
  372.  * Finally, clear ::current_assignment and ::has_swizzle.
  373.  */
  374. ir_visitor_status
  375. ir_vectorize_visitor::visit_leave(ir_assignment *ir)
  376. {
  377.    if (this->has_swizzle && this->current_assignment) {
  378.       assert(this->current_assignment == ir);
  379.  
  380.       unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
  381.       this->assignment[channel] = ir;
  382.       this->channels++;
  383.  
  384.       this->last_assignment = this->current_assignment;
  385.    }
  386.    this->current_assignment = NULL;
  387.    this->has_swizzle = false;
  388.    return visit_continue;
  389. }
  390.  
  391. /**
  392.  * Combines scalar assignments of the same expression (modulo swizzle) to
  393.  * multiple channels of the same variable into a single vectorized expression
  394.  * and assignment.
  395.  */
  396. bool
  397. do_vectorize(exec_list *instructions)
  398. {
  399.    ir_vectorize_visitor v;
  400.  
  401.    v.run(instructions);
  402.  
  403.    /* Try to vectorize the last assignments seen. */
  404.    v.try_vectorize();
  405.  
  406.    return v.progress;
  407. }
  408.