Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
  2.  
  3. /*
  4.  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  *
  25.  * Authors:
  26.  *    Rob Clark <robclark@freedesktop.org>
  27.  */
  28.  
  29. #include "pipe/p_state.h"
  30. #include "util/u_string.h"
  31. #include "util/u_memory.h"
  32. #include "util/u_inlines.h"
  33. #include "util/u_format.h"
  34. #include "tgsi/tgsi_dump.h"
  35. #include "tgsi/tgsi_parse.h"
  36.  
  37. #include "freedreno_context.h"
  38. #include "freedreno_util.h"
  39.  
  40. #include "ir3_shader.h"
  41. #include "ir3_compiler.h"
  42.  
  43.  
  44. static void
  45. delete_variant(struct ir3_shader_variant *v)
  46. {
  47.         if (v->ir)
  48.                 ir3_destroy(v->ir);
  49.         fd_bo_del(v->bo);
  50.         free(v);
  51. }
  52.  
  53. /* for vertex shader, the inputs are loaded into registers before the shader
  54.  * is executed, so max_regs from the shader instructions might not properly
  55.  * reflect the # of registers actually used, especially in case passthrough
  56.  * varyings.
  57.  *
  58.  * Likewise, for fragment shader, we can have some regs which are passed
  59.  * input values but never touched by the resulting shader (ie. as result
  60.  * of dead code elimination or simply because we don't know how to turn
  61.  * the reg off.
  62.  */
  63. static void
  64. fixup_regfootprint(struct ir3_shader_variant *v)
  65. {
  66.         if (v->type == SHADER_VERTEX) {
  67.                 unsigned i;
  68.                 for (i = 0; i < v->inputs_count; i++) {
  69.                         /* skip frag inputs fetch via bary.f since their reg's are
  70.                          * not written by gpu before shader starts (and in fact the
  71.                          * regid's might not even be valid)
  72.                          */
  73.                         if (v->inputs[i].bary)
  74.                                 continue;
  75.  
  76.                         if (v->inputs[i].compmask) {
  77.                                 int32_t regid = (v->inputs[i].regid + 3) >> 2;
  78.                                 v->info.max_reg = MAX2(v->info.max_reg, regid);
  79.                         }
  80.                 }
  81.                 for (i = 0; i < v->outputs_count; i++) {
  82.                         int32_t regid = (v->outputs[i].regid + 3) >> 2;
  83.                         v->info.max_reg = MAX2(v->info.max_reg, regid);
  84.                 }
  85.         } else if (v->type == SHADER_FRAGMENT) {
  86.                 /* NOTE: not sure how to turn pos_regid off..  but this could
  87.                  * be, for example, r1.x while max reg used by the shader is
  88.                  * r0.*, in which case we need to fixup the reg footprint:
  89.                  */
  90.                 v->info.max_reg = MAX2(v->info.max_reg, v->pos_regid >> 2);
  91.                 if (v->frag_coord)
  92.                         debug_assert(v->info.max_reg >= 0); /* hard coded r0.x */
  93.                 if (v->frag_face)
  94.                         debug_assert(v->info.max_half_reg >= 0); /* hr0.x */
  95.         }
  96. }
  97.  
  98. /* wrapper for ir3_assemble() which does some info fixup based on
  99.  * shader state.  Non-static since used by ir3_cmdline too.
  100.  */
  101. void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
  102. {
  103.         void *bin;
  104.  
  105.         bin = ir3_assemble(v->ir, &v->info, gpu_id);
  106.         if (!bin)
  107.                 return NULL;
  108.  
  109.         if (gpu_id >= 400) {
  110.                 v->instrlen = v->info.sizedwords / (2 * 16);
  111.         } else {
  112.                 v->instrlen = v->info.sizedwords / (2 * 4);
  113.         }
  114.  
  115.         /* NOTE: if relative addressing is used, we set constlen in
  116.          * the compiler (to worst-case value) since we don't know in
  117.          * the assembler what the max addr reg value can be:
  118.          */
  119.         v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
  120.  
  121.         fixup_regfootprint(v);
  122.  
  123.         return bin;
  124. }
  125.  
  126. static void
  127. assemble_variant(struct ir3_shader_variant *v)
  128. {
  129.         struct fd_context *ctx = fd_context(v->shader->pctx);
  130.         uint32_t gpu_id = ir3_shader_gpuid(v->shader);
  131.         uint32_t sz, *bin;
  132.  
  133.         bin = ir3_shader_assemble(v, gpu_id);
  134.         sz = v->info.sizedwords * 4;
  135.  
  136.         v->bo = fd_bo_new(ctx->dev, sz,
  137.                         DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
  138.                         DRM_FREEDRENO_GEM_TYPE_KMEM);
  139.  
  140.         memcpy(fd_bo_map(v->bo), bin, sz);
  141.  
  142.         free(bin);
  143.  
  144.         /* no need to keep the ir around beyond this point: */
  145.         ir3_destroy(v->ir);
  146.         v->ir = NULL;
  147. }
  148.  
  149. /* reset before attempting to compile again.. */
  150. static void reset_variant(struct ir3_shader_variant *v, const char *msg)
  151. {
  152.         debug_error(msg);
  153.         v->inputs_count = 0;
  154.         v->outputs_count = 0;
  155.         v->total_in = 0;
  156.         v->has_samp = false;
  157.         v->immediates_count = 0;
  158. }
  159.  
  160. static struct ir3_shader_variant *
  161. create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
  162. {
  163.         struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
  164.         const struct tgsi_token *tokens = shader->tokens;
  165.         int ret;
  166.  
  167.         if (!v)
  168.                 return NULL;
  169.  
  170.         v->shader = shader;
  171.         v->key = key;
  172.         v->type = shader->type;
  173.  
  174.         if (fd_mesa_debug & FD_DBG_DISASM) {
  175.                 DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
  176.                         key.binning_pass, key.color_two_side, key.half_precision);
  177.                 tgsi_dump(tokens, 0);
  178.         }
  179.  
  180.         if (fd_mesa_debug & FD_DBG_NIR) {
  181.                 ret = ir3_compile_shader_nir(v, tokens, key);
  182.                 if (ret)
  183.                         reset_variant(v, "NIR compiler failed, fallback to TGSI!");
  184.         } else {
  185.                 ret = -1;
  186.         }
  187.  
  188.         if (ret) {
  189.                 ret = ir3_compile_shader(v, tokens, key, true);
  190.                 if (ret) {
  191.                         reset_variant(v, "new compiler failed, trying without copy propagation!");
  192.                         ret = ir3_compile_shader(v, tokens, key, false);
  193.                 }
  194.         }
  195.  
  196.         if (ret) {
  197.                 debug_error("compile failed!");
  198.                 goto fail;
  199.         }
  200.  
  201.         assemble_variant(v);
  202.         if (!v->bo) {
  203.                 debug_error("assemble failed!");
  204.                 goto fail;
  205.         }
  206.  
  207.         if (fd_mesa_debug & FD_DBG_DISASM) {
  208.                 DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
  209.                         key.binning_pass, key.color_two_side, key.half_precision);
  210.                 disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
  211.         }
  212.  
  213.         return v;
  214.  
  215. fail:
  216.         delete_variant(v);
  217.         return NULL;
  218. }
  219.  
  220. uint32_t
  221. ir3_shader_gpuid(struct ir3_shader *shader)
  222. {
  223.         struct fd_context *ctx = fd_context(shader->pctx);
  224.         return ctx->screen->gpu_id;
  225. }
  226.  
  227. struct ir3_shader_variant *
  228. ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
  229. {
  230.         struct ir3_shader_variant *v;
  231.  
  232.         /* some shader key values only apply to vertex or frag shader,
  233.          * so normalize the key to avoid constructing multiple identical
  234.          * variants:
  235.          */
  236.         switch (shader->type) {
  237.         case SHADER_FRAGMENT:
  238.         case SHADER_COMPUTE:
  239.                 key.binning_pass = false;
  240.                 if (key.has_per_samp) {
  241.                         key.vsaturate_s = 0;
  242.                         key.vsaturate_t = 0;
  243.                         key.vsaturate_r = 0;
  244.                 }
  245.                 break;
  246.         case SHADER_VERTEX:
  247.                 key.color_two_side = false;
  248.                 key.half_precision = false;
  249.                 key.rasterflat = false;
  250.                 if (key.has_per_samp) {
  251.                         key.fsaturate_s = 0;
  252.                         key.fsaturate_t = 0;
  253.                         key.fsaturate_r = 0;
  254.                 }
  255.                 break;
  256.         }
  257.  
  258.         for (v = shader->variants; v; v = v->next)
  259.                 if (ir3_shader_key_equal(&key, &v->key))
  260.                         return v;
  261.  
  262.         /* compile new variant if it doesn't exist already: */
  263.         v = create_variant(shader, key);
  264.         v->next = shader->variants;
  265.         shader->variants = v;
  266.  
  267.         return v;
  268. }
  269.  
  270.  
  271. void
  272. ir3_shader_destroy(struct ir3_shader *shader)
  273. {
  274.         struct ir3_shader_variant *v, *t;
  275.         for (v = shader->variants; v; ) {
  276.                 t = v;
  277.                 v = v->next;
  278.                 delete_variant(t);
  279.         }
  280.         free((void *)shader->tokens);
  281.         free(shader);
  282. }
  283.  
  284. struct ir3_shader *
  285. ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
  286.                 enum shader_t type)
  287. {
  288.         struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
  289.         shader->pctx = pctx;
  290.         shader->type = type;
  291.         shader->tokens = tgsi_dup_tokens(tokens);
  292.         return shader;
  293. }
  294.