Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Jason Ekstrand (jason@jlekstrand.net)
  25.  *
  26.  */
  27.  
  28. #include "nir.h"
  29.  
  30. /*
  31.  * Implements a simple pass that lowers vecN instructions to a series of
  32.  * moves with partial writes.
  33.  */
  34.  
  35. static bool
  36. src_matches_dest_reg(nir_dest *dest, nir_src *src)
  37. {
  38.    if (dest->is_ssa || src->is_ssa)
  39.       return false;
  40.  
  41.    return (dest->reg.reg == src->reg.reg &&
  42.            dest->reg.base_offset == src->reg.base_offset &&
  43.            !dest->reg.indirect &&
  44.            !src->reg.indirect);
  45. }
  46.  
  47. /**
  48.  * For a given starting writemask channel and corresponding source index in
  49.  * the vec instruction, insert a MOV to the vec instruction's dest of all the
  50.  * writemask channels that get read from the same src reg.
  51.  *
  52.  * Returns the writemask of our MOV, so the parent loop calling this knows
  53.  * which ones have been processed.
  54.  */
  55. static unsigned
  56. insert_mov(nir_alu_instr *vec, unsigned start_channel,
  57.             unsigned start_src_idx, void *mem_ctx)
  58. {
  59.    unsigned src_idx = start_src_idx;
  60.    assert(src_idx < nir_op_infos[vec->op].num_inputs);
  61.  
  62.    nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
  63.    nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
  64.    nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
  65.  
  66.    mov->dest.write_mask = (1u << start_channel);
  67.    mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
  68.    src_idx++;
  69.  
  70.    for (unsigned i = start_channel + 1; i < 4; i++) {
  71.       if (!(vec->dest.write_mask & (1 << i)))
  72.          continue;
  73.  
  74.       if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) {
  75.          mov->dest.write_mask |= (1 << i);
  76.          mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
  77.       }
  78.       src_idx++;
  79.    }
  80.  
  81.    nir_instr_insert_before(&vec->instr, &mov->instr);
  82.  
  83.    return mov->dest.write_mask;
  84. }
  85.  
  86. static bool
  87. lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
  88. {
  89.    nir_foreach_instr_safe(block, instr) {
  90.       if (instr->type != nir_instr_type_alu)
  91.          continue;
  92.  
  93.       nir_alu_instr *vec = (nir_alu_instr *)instr;
  94.  
  95.       switch (vec->op) {
  96.       case nir_op_vec2:
  97.       case nir_op_vec3:
  98.       case nir_op_vec4:
  99.          break;
  100.       default:
  101.          continue; /* The loop */
  102.       }
  103.  
  104.       /* Since we insert multiple MOVs, we have to be non-SSA. */
  105.       assert(!vec->dest.dest.is_ssa);
  106.  
  107.       unsigned finished_write_mask = 0;
  108.  
  109.       /* First, emit a MOV for all the src channels that are in the
  110.        * destination reg, in case other values we're populating in the dest
  111.        * might overwrite them.
  112.        */
  113.       for (unsigned i = 0, src_idx = 0; i < 4; i++) {
  114.          if (!(vec->dest.write_mask & (1 << i)))
  115.             continue;
  116.  
  117.          if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
  118.             finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
  119.             break;
  120.          }
  121.          src_idx++;
  122.       }
  123.  
  124.       /* Now, emit MOVs for all the other src channels. */
  125.       for (unsigned i = 0, src_idx = 0; i < 4; i++) {
  126.          if (!(vec->dest.write_mask & (1 << i)))
  127.             continue;
  128.  
  129.          if (!(finished_write_mask & (1 << i)))
  130.             finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
  131.  
  132.          src_idx++;
  133.       }
  134.  
  135.       nir_instr_remove(&vec->instr);
  136.       ralloc_free(vec);
  137.    }
  138.  
  139.    return true;
  140. }
  141.  
  142. static void
  143. nir_lower_vec_to_movs_impl(nir_function_impl *impl)
  144. {
  145.    nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
  146. }
  147.  
  148. void
  149. nir_lower_vec_to_movs(nir_shader *shader)
  150. {
  151.    nir_foreach_overload(shader, overload) {
  152.       if (overload->impl)
  153.          nir_lower_vec_to_movs_impl(overload->impl);
  154.    }
  155. }
  156.