Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file lower_varyings_to_packed.cpp
  26.  *
  27.  * This lowering pass generates GLSL code that manually packs varyings into
  28.  * vec4 slots, for the benefit of back-ends that don't support packed varyings
  29.  * natively.
  30.  *
  31.  * For example, the following shader:
  32.  *
  33.  *   out mat3x2 foo;  // location=4, location_frac=0
  34.  *   out vec3 bar[2]; // location=5, location_frac=2
  35.  *
  36.  *   main()
  37.  *   {
  38.  *     ...
  39.  *   }
  40.  *
  41.  * Is rewritten to:
  42.  *
  43.  *   mat3x2 foo;
  44.  *   vec3 bar[2];
  45.  *   out vec4 packed4; // location=4, location_frac=0
  46.  *   out vec4 packed5; // location=5, location_frac=0
  47.  *   out vec4 packed6; // location=6, location_frac=0
  48.  *
  49.  *   main()
  50.  *   {
  51.  *     ...
  52.  *     packed4.xy = foo[0];
  53.  *     packed4.zw = foo[1];
  54.  *     packed5.xy = foo[2];
  55.  *     packed5.zw = bar[0].xy;
  56.  *     packed6.x = bar[0].z;
  57.  *     packed6.yzw = bar[1];
  58.  *   }
  59.  *
  60.  * This lowering pass properly handles "double parking" of a varying vector
  61.  * across two varying slots.  For example, in the code above, two of the
  62.  * components of bar[0] are stored in packed5, and the remaining component is
  63.  * stored in packed6.
  64.  *
  65.  * Note that in theory, the extra instructions may cause some loss of
  66.  * performance.  However, hopefully in most cases the performance loss will
  67.  * either be absorbed by a later optimization pass, or it will be offset by
  68.  * memory bandwidth savings (because fewer varyings are used).
  69.  *
  70.  * This lowering pass also packs flat floats, ints, and uints together, by
  71.  * using ivec4 as the base type of flat "varyings", and using appropriate
  72.  * casts to convert floats and uints into ints.
  73.  *
  74.  * This lowering pass also handles varyings whose type is a struct or an array
  75.  * of struct.  Structs are packed in order and with no gaps, so there may be a
  76.  * performance penalty due to structure elements being double-parked.
  77.  */
  78.  
  79. #include "glsl_symbol_table.h"
  80. #include "ir.h"
  81. #include "ir_optimization.h"
  82.  
  83. /**
  84.  * Visitor that performs varying packing.  For each varying declared in the
  85.  * shader, this visitor determines whether it needs to be packed.  If so, it
  86.  * demotes it to an ordinary global, creates new packed varyings, and
  87.  * generates assignments to convert between the original varying and the
  88.  * packed varying.
  89.  */
  90. class lower_packed_varyings_visitor
  91. {
  92. public:
  93.    lower_packed_varyings_visitor(void *mem_ctx, unsigned location_base,
  94.                                  unsigned locations_used,
  95.                                  ir_variable_mode mode,
  96.                                  exec_list *main_instructions);
  97.  
  98.    void run(exec_list *instructions);
  99.  
  100. private:
  101.    ir_assignment *bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
  102.    ir_assignment *bitwise_assign_unpack(ir_rvalue *lhs, ir_rvalue *rhs);
  103.    unsigned lower_rvalue(ir_rvalue *rvalue, unsigned fine_location,
  104.                          ir_variable *unpacked_var, const char *name);
  105.    unsigned lower_arraylike(ir_rvalue *rvalue, unsigned array_size,
  106.                             unsigned fine_location,
  107.                             ir_variable *unpacked_var, const char *name);
  108.    ir_variable *get_packed_varying(unsigned location,
  109.                                    ir_variable *unpacked_var,
  110.                                    const char *name);
  111.    bool needs_lowering(ir_variable *var);
  112.  
  113.    /**
  114.     * Memory context used to allocate new instructions for the shader.
  115.     */
  116.    void * const mem_ctx;
  117.  
  118.    /**
  119.     * Location representing the first generic varying slot for this shader
  120.     * stage (e.g. VARYING_SLOT_VAR0 if we are packing vertex shader outputs).
  121.     * Varyings whose location is less than this value are assumed to
  122.     * correspond to special fixed function hardware, so they are not lowered.
  123.     */
  124.    const unsigned location_base;
  125.  
  126.    /**
  127.     * Number of generic varying slots which are used by this shader.  This is
  128.     * used to allocate temporary intermediate data structures.  If any any
  129.     * varying used by this shader has a location greater than or equal to
  130.     * location_base + locations_used, an assertion will fire.
  131.     */
  132.    const unsigned locations_used;
  133.  
  134.    /**
  135.     * Array of pointers to the packed varyings that have been created for each
  136.     * generic varying slot.  NULL entries in this array indicate varying slots
  137.     * for which a packed varying has not been created yet.
  138.     */
  139.    ir_variable **packed_varyings;
  140.  
  141.    /**
  142.     * Type of varying which is being lowered in this pass (either
  143.     * ir_var_shader_in or ir_var_shader_out).
  144.     */
  145.    const ir_variable_mode mode;
  146.  
  147.    /**
  148.     * List of instructions corresponding to the main() function.  This is
  149.     * where we add instructions to pack or unpack the varyings.
  150.     */
  151.    exec_list *main_instructions;
  152. };
  153.  
  154. lower_packed_varyings_visitor::lower_packed_varyings_visitor(
  155.       void *mem_ctx, unsigned location_base, unsigned locations_used,
  156.       ir_variable_mode mode, exec_list *main_instructions)
  157.    : mem_ctx(mem_ctx),
  158.      location_base(location_base),
  159.      locations_used(locations_used),
  160.      packed_varyings((ir_variable **)
  161.                      rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
  162.                                         locations_used)),
  163.      mode(mode),
  164.      main_instructions(main_instructions)
  165. {
  166. }
  167.  
  168. void
  169. lower_packed_varyings_visitor::run(exec_list *instructions)
  170. {
  171.    foreach_list (node, instructions) {
  172.       ir_variable *var = ((ir_instruction *) node)->as_variable();
  173.       if (var == NULL)
  174.          continue;
  175.  
  176.       if (var->mode != this->mode ||
  177.           var->location < (int) this->location_base ||
  178.           !this->needs_lowering(var))
  179.          continue;
  180.  
  181.       /* This lowering pass is only capable of packing floats and ints
  182.        * together when their interpolation mode is "flat".  Therefore, to be
  183.        * safe, caller should ensure that integral varyings always use flat
  184.        * interpolation, even when this is not required by GLSL.
  185.        */
  186.       assert(var->interpolation == INTERP_QUALIFIER_FLAT ||
  187.              !var->type->contains_integer());
  188.  
  189.       /* Change the old varying into an ordinary global. */
  190.       var->mode = ir_var_auto;
  191.  
  192.       /* Create a reference to the old varying. */
  193.       ir_dereference_variable *deref
  194.          = new(this->mem_ctx) ir_dereference_variable(var);
  195.  
  196.       /* Recursively pack or unpack it. */
  197.       this->lower_rvalue(deref, var->location * 4 + var->location_frac, var,
  198.                          var->name);
  199.    }
  200. }
  201.  
  202.  
  203. /**
  204.  * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
  205.  * bitcasts if necessary to match up types.
  206.  *
  207.  * This function is called when packing varyings.
  208.  */
  209. ir_assignment *
  210. lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs,
  211.                                                    ir_rvalue *rhs)
  212. {
  213.    if (lhs->type->base_type != rhs->type->base_type) {
  214.       /* Since we only mix types in flat varyings, and we always store flat
  215.        * varyings as type ivec4, we need only produce conversions from (uint
  216.        * or float) to int.
  217.        */
  218.       assert(lhs->type->base_type == GLSL_TYPE_INT);
  219.       switch (rhs->type->base_type) {
  220.       case GLSL_TYPE_UINT:
  221.          rhs = new(this->mem_ctx)
  222.             ir_expression(ir_unop_u2i, lhs->type, rhs);
  223.          break;
  224.       case GLSL_TYPE_FLOAT:
  225.          rhs = new(this->mem_ctx)
  226.             ir_expression(ir_unop_bitcast_f2i, lhs->type, rhs);
  227.          break;
  228.       default:
  229.          assert(!"Unexpected type conversion while lowering varyings");
  230.          break;
  231.       }
  232.    }
  233.    return new(this->mem_ctx) ir_assignment(lhs, rhs);
  234. }
  235.  
  236.  
  237. /**
  238.  * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
  239.  * bitcasts if necessary to match up types.
  240.  *
  241.  * This function is called when unpacking varyings.
  242.  */
  243. ir_assignment *
  244. lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs,
  245.                                                      ir_rvalue *rhs)
  246. {
  247.    if (lhs->type->base_type != rhs->type->base_type) {
  248.       /* Since we only mix types in flat varyings, and we always store flat
  249.        * varyings as type ivec4, we need only produce conversions from int to
  250.        * (uint or float).
  251.        */
  252.       assert(rhs->type->base_type == GLSL_TYPE_INT);
  253.       switch (lhs->type->base_type) {
  254.       case GLSL_TYPE_UINT:
  255.          rhs = new(this->mem_ctx)
  256.             ir_expression(ir_unop_i2u, lhs->type, rhs);
  257.          break;
  258.       case GLSL_TYPE_FLOAT:
  259.          rhs = new(this->mem_ctx)
  260.             ir_expression(ir_unop_bitcast_i2f, lhs->type, rhs);
  261.          break;
  262.       default:
  263.          assert(!"Unexpected type conversion while lowering varyings");
  264.          break;
  265.       }
  266.    }
  267.    return new(this->mem_ctx) ir_assignment(lhs, rhs);
  268. }
  269.  
  270.  
  271. /**
  272.  * Recursively pack or unpack the given varying (or portion of a varying) by
  273.  * traversing all of its constituent vectors.
  274.  *
  275.  * \param fine_location is the location where the first constituent vector
  276.  * should be packed--the word "fine" indicates that this location is expressed
  277.  * in multiples of a float, rather than multiples of a vec4 as is used
  278.  * elsewhere in Mesa.
  279.  *
  280.  * \return the location where the next constituent vector (after this one)
  281.  * should be packed.
  282.  */
  283. unsigned
  284. lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
  285.                                             unsigned fine_location,
  286.                                             ir_variable *unpacked_var,
  287.                                             const char *name)
  288. {
  289.    if (rvalue->type->is_record()) {
  290.       for (unsigned i = 0; i < rvalue->type->length; i++) {
  291.          if (i != 0)
  292.             rvalue = rvalue->clone(this->mem_ctx, NULL);
  293.          const char *field_name = rvalue->type->fields.structure[i].name;
  294.          ir_dereference_record *dereference_record = new(this->mem_ctx)
  295.             ir_dereference_record(rvalue, field_name);
  296.          char *deref_name
  297.             = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name);
  298.          fine_location = this->lower_rvalue(dereference_record, fine_location,
  299.                                             unpacked_var, deref_name);
  300.       }
  301.       return fine_location;
  302.    } else if (rvalue->type->is_array()) {
  303.       /* Arrays are packed/unpacked by considering each array element in
  304.        * sequence.
  305.        */
  306.       return this->lower_arraylike(rvalue, rvalue->type->array_size(),
  307.                                    fine_location, unpacked_var, name);
  308.    } else if (rvalue->type->is_matrix()) {
  309.       /* Matrices are packed/unpacked by considering each column vector in
  310.        * sequence.
  311.        */
  312.       return this->lower_arraylike(rvalue, rvalue->type->matrix_columns,
  313.                                    fine_location, unpacked_var, name);
  314.    } else if (rvalue->type->vector_elements + fine_location % 4 > 4) {
  315.       /* This vector is going to be "double parked" across two varying slots,
  316.        * so handle it as two separate assignments.
  317.        */
  318.       unsigned left_components = 4 - fine_location % 4;
  319.       unsigned right_components
  320.          = rvalue->type->vector_elements - left_components;
  321.       unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
  322.       unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
  323.       char left_swizzle_name[4] = { 0, 0, 0, 0 };
  324.       char right_swizzle_name[4] = { 0, 0, 0, 0 };
  325.       for (unsigned i = 0; i < left_components; i++) {
  326.          left_swizzle_values[i] = i;
  327.          left_swizzle_name[i] = "xyzw"[i];
  328.       }
  329.       for (unsigned i = 0; i < right_components; i++) {
  330.          right_swizzle_values[i] = i + left_components;
  331.          right_swizzle_name[i] = "xyzw"[i + left_components];
  332.       }
  333.       ir_swizzle *left_swizzle = new(this->mem_ctx)
  334.          ir_swizzle(rvalue, left_swizzle_values, left_components);
  335.       ir_swizzle *right_swizzle = new(this->mem_ctx)
  336.          ir_swizzle(rvalue->clone(this->mem_ctx, NULL), right_swizzle_values,
  337.                     right_components);
  338.       char *left_name
  339.          = ralloc_asprintf(this->mem_ctx, "%s.%s", name, left_swizzle_name);
  340.       char *right_name
  341.          = ralloc_asprintf(this->mem_ctx, "%s.%s", name, right_swizzle_name);
  342.       fine_location = this->lower_rvalue(left_swizzle, fine_location,
  343.                                          unpacked_var, left_name);
  344.       return this->lower_rvalue(right_swizzle, fine_location, unpacked_var,
  345.                                 right_name);
  346.    } else {
  347.       /* No special handling is necessary; pack the rvalue into the
  348.        * varying.
  349.        */
  350.       unsigned swizzle_values[4] = { 0, 0, 0, 0 };
  351.       unsigned components = rvalue->type->vector_elements;
  352.       unsigned location = fine_location / 4;
  353.       unsigned location_frac = fine_location % 4;
  354.       for (unsigned i = 0; i < components; ++i)
  355.          swizzle_values[i] = i + location_frac;
  356.       ir_dereference_variable *packed_deref = new(this->mem_ctx)
  357.          ir_dereference_variable(this->get_packed_varying(location,
  358.                                                           unpacked_var, name));
  359.       ir_swizzle *swizzle = new(this->mem_ctx)
  360.          ir_swizzle(packed_deref, swizzle_values, components);
  361.       if (this->mode == ir_var_shader_out) {
  362.          ir_assignment *assignment
  363.             = this->bitwise_assign_pack(swizzle, rvalue);
  364.          this->main_instructions->push_tail(assignment);
  365.       } else {
  366.          ir_assignment *assignment
  367.             = this->bitwise_assign_unpack(rvalue, swizzle);
  368.          this->main_instructions->push_head(assignment);
  369.       }
  370.       return fine_location + components;
  371.    }
  372. }
  373.  
  374. /**
  375.  * Recursively pack or unpack a varying for which we need to iterate over its
  376.  * constituent elements, accessing each one using an ir_dereference_array.
  377.  * This takes care of both arrays and matrices, since ir_dereference_array
  378.  * treats a matrix like an array of its column vectors.
  379.  */
  380. unsigned
  381. lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue,
  382.                                                unsigned array_size,
  383.                                                unsigned fine_location,
  384.                                                ir_variable *unpacked_var,
  385.                                                const char *name)
  386. {
  387.    for (unsigned i = 0; i < array_size; i++) {
  388.       if (i != 0)
  389.          rvalue = rvalue->clone(this->mem_ctx, NULL);
  390.       ir_constant *constant = new(this->mem_ctx) ir_constant(i);
  391.       ir_dereference_array *dereference_array = new(this->mem_ctx)
  392.          ir_dereference_array(rvalue, constant);
  393.       char *subscripted_name
  394.          = ralloc_asprintf(this->mem_ctx, "%s[%d]", name, i);
  395.       fine_location = this->lower_rvalue(dereference_array, fine_location,
  396.                                          unpacked_var, subscripted_name);
  397.    }
  398.    return fine_location;
  399. }
  400.  
  401. /**
  402.  * Retrieve the packed varying corresponding to the given varying location.
  403.  * If no packed varying has been created for the given varying location yet,
  404.  * create it and add it to the shader before returning it.
  405.  *
  406.  * The newly created varying inherits its interpolation parameters from \c
  407.  * unpacked_var.  Its base type is ivec4 if we are lowering a flat varying,
  408.  * vec4 otherwise.
  409.  */
  410. ir_variable *
  411. lower_packed_varyings_visitor::get_packed_varying(unsigned location,
  412.                                                   ir_variable *unpacked_var,
  413.                                                   const char *name)
  414. {
  415.    unsigned slot = location - this->location_base;
  416.    assert(slot < locations_used);
  417.    if (this->packed_varyings[slot] == NULL) {
  418.       char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
  419.       const glsl_type *packed_type;
  420.       if (unpacked_var->interpolation == INTERP_QUALIFIER_FLAT)
  421.          packed_type = glsl_type::ivec4_type;
  422.       else
  423.          packed_type = glsl_type::vec4_type;
  424.       ir_variable *packed_var = new(this->mem_ctx)
  425.          ir_variable(packed_type, packed_name, this->mode);
  426.       packed_var->centroid = unpacked_var->centroid;
  427.       packed_var->interpolation = unpacked_var->interpolation;
  428.       packed_var->location = location;
  429.       unpacked_var->insert_before(packed_var);
  430.       this->packed_varyings[slot] = packed_var;
  431.    } else {
  432.       ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name,
  433.                              ",%s", name);
  434.    }
  435.    return this->packed_varyings[slot];
  436. }
  437.  
  438. bool
  439. lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
  440. {
  441.    /* Things composed of vec4's don't need lowering.  Everything else does. */
  442.    const glsl_type *type = var->type;
  443.    if (type->is_array())
  444.       type = type->fields.array;
  445.    if (type->vector_elements == 4)
  446.       return false;
  447.    return true;
  448. }
  449.  
  450. void
  451. lower_packed_varyings(void *mem_ctx, unsigned location_base,
  452.                       unsigned locations_used, ir_variable_mode mode,
  453.                       gl_shader *shader)
  454. {
  455.    exec_list *instructions = shader->ir;
  456.    ir_function *main_func = shader->symbols->get_function("main");
  457.    exec_list void_parameters;
  458.    ir_function_signature *main_func_sig
  459.       = main_func->matching_signature(&void_parameters);
  460.    exec_list *main_instructions = &main_func_sig->body;
  461.    lower_packed_varyings_visitor visitor(mem_ctx, location_base,
  462.                                          locations_used, mode,
  463.                                          main_instructions);
  464.    visitor.run(instructions);
  465. }
  466.