Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2015 Red Hat
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Rob Clark <robclark@freedesktop.org>
  25.  */
  26.  
  27. #include "nir.h"
  28. #include "nir_builder.h"
  29.  
  30. /* Lowers idiv/udiv/umod
  31.  * Based on NV50LegalizeSSA::handleDIV()
  32.  *
  33.  * Note that this is probably not enough precision for compute shaders.
  34.  * Perhaps we want a second higher precision (looping) version of this?
  35.  * Or perhaps we assume if you can do compute shaders you can also
  36.  * branch out to a pre-optimized shader library routine..
  37.  */
  38.  
  39. static void
  40. convert_instr(nir_builder *bld, nir_alu_instr *alu)
  41. {
  42.    nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
  43.    nir_op op = alu->op;
  44.    bool is_signed;
  45.  
  46.    if ((op != nir_op_idiv) &&
  47.        (op != nir_op_udiv) &&
  48.        (op != nir_op_umod))
  49.       return;
  50.  
  51.    is_signed = (op == nir_op_idiv);
  52.  
  53.    nir_builder_insert_before_instr(bld, &alu->instr);
  54.  
  55.    numer = nir_ssa_for_src(bld, alu->src[0].src,
  56.                            nir_ssa_alu_instr_src_components(alu, 0));
  57.    denom = nir_ssa_for_src(bld, alu->src[1].src,
  58.                            nir_ssa_alu_instr_src_components(alu, 1));
  59.  
  60.    if (is_signed) {
  61.       af = nir_i2f(bld, numer);
  62.       bf = nir_i2f(bld, denom);
  63.       af = nir_fabs(bld, af);
  64.       bf = nir_fabs(bld, bf);
  65.       a  = nir_iabs(bld, numer);
  66.       b  = nir_iabs(bld, denom);
  67.    } else {
  68.       af = nir_u2f(bld, numer);
  69.       bf = nir_u2f(bld, denom);
  70.       a  = numer;
  71.       b  = denom;
  72.    }
  73.  
  74.    /* get first result: */
  75.    bf = nir_frcp(bld, bf);
  76.    bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
  77.    q  = nir_fmul(bld, af, bf);
  78.  
  79.    if (is_signed) {
  80.       q = nir_f2i(bld, q);
  81.    } else {
  82.       q = nir_f2u(bld, q);
  83.    }
  84.  
  85.    /* get error of first result: */
  86.    r = nir_imul(bld, q, b);
  87.    r = nir_isub(bld, a, r);
  88.    r = nir_u2f(bld, r);
  89.    r = nir_fmul(bld, r, bf);
  90.    r = nir_f2u(bld, r);
  91.  
  92.    /* add quotients: */
  93.    q = nir_iadd(bld, q, r);
  94.  
  95.    /* correction: if modulus >= divisor, add 1 */
  96.    r = nir_imul(bld, q, b);
  97.    r = nir_isub(bld, a, r);
  98.  
  99.    r = nir_ige(bld, r, b);
  100.    r = nir_b2i(bld, r);
  101.  
  102.    q = nir_iadd(bld, q, r);
  103.    if (is_signed)  {
  104.       /* fix the sign: */
  105.       r = nir_ixor(bld, numer, denom);
  106.       r = nir_ushr(bld, r, nir_imm_int(bld, 31));
  107.       r = nir_i2b(bld, r);
  108.       b = nir_ineg(bld, q);
  109.       q = nir_bcsel(bld, r, b, q);
  110.    }
  111.  
  112.    if (op == nir_op_umod) {
  113.       /* division result in q */
  114.       r = nir_imul(bld, q, b);
  115.       q = nir_isub(bld, a, r);
  116.    }
  117.  
  118.    assert(alu->dest.dest.is_ssa);
  119.    nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
  120.                             nir_src_for_ssa(q),
  121.                             ralloc_parent(alu));
  122. }
  123.  
  124. static bool
  125. convert_block(nir_block *block, void *state)
  126. {
  127.    nir_builder *b = state;
  128.  
  129.    nir_foreach_instr_safe(block, instr) {
  130.       if (instr->type == nir_instr_type_alu)
  131.          convert_instr(b, nir_instr_as_alu(instr));
  132.    }
  133.  
  134.    return true;
  135. }
  136.  
  137. static void
  138. convert_impl(nir_function_impl *impl)
  139. {
  140.    nir_builder b;
  141.    nir_builder_init(&b, impl);
  142.  
  143.    nir_foreach_block(impl, convert_block, &b);
  144.    nir_metadata_preserve(impl, nir_metadata_block_index |
  145.                                nir_metadata_dominance);
  146. }
  147.  
  148. void
  149. nir_lower_idiv(nir_shader *shader)
  150. {
  151.    nir_foreach_overload(shader, overload) {
  152.       if (overload->impl)
  153.          convert_impl(overload->impl);
  154.    }
  155. }
  156.