Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "genhw/genhw.h" /* for SBE setup */
  29. #include "core/ilo_builder.h"
  30. #include "core/ilo_state_3d.h"
  31. #include "core/intel_winsys.h"
  32. #include "shader/ilo_shader_internal.h"
  33. #include "tgsi/tgsi_parse.h"
  34.  
  35. #include "ilo_state.h"
  36. #include "ilo_shader.h"
  37.  
  38. struct ilo_shader_cache {
  39.    struct list_head shaders;
  40.    struct list_head changed;
  41. };
  42.  
  43. /**
  44.  * Create a shader cache.  A shader cache can manage shaders and upload them
  45.  * to a bo as a whole.
  46.  */
  47. struct ilo_shader_cache *
  48. ilo_shader_cache_create(void)
  49. {
  50.    struct ilo_shader_cache *shc;
  51.  
  52.    shc = CALLOC_STRUCT(ilo_shader_cache);
  53.    if (!shc)
  54.       return NULL;
  55.  
  56.    list_inithead(&shc->shaders);
  57.    list_inithead(&shc->changed);
  58.  
  59.    return shc;
  60. }
  61.  
  62. /**
  63.  * Destroy a shader cache.
  64.  */
  65. void
  66. ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
  67. {
  68.    FREE(shc);
  69. }
  70.  
  71. /**
  72.  * Add a shader to the cache.
  73.  */
  74. void
  75. ilo_shader_cache_add(struct ilo_shader_cache *shc,
  76.                      struct ilo_shader_state *shader)
  77. {
  78.    struct ilo_shader *sh;
  79.  
  80.    shader->cache = shc;
  81.    LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
  82.       sh->uploaded = false;
  83.  
  84.    list_add(&shader->list, &shc->changed);
  85. }
  86.  
  87. /**
  88.  * Remove a shader from the cache.
  89.  */
  90. void
  91. ilo_shader_cache_remove(struct ilo_shader_cache *shc,
  92.                         struct ilo_shader_state *shader)
  93. {
  94.    list_del(&shader->list);
  95.    shader->cache = NULL;
  96. }
  97.  
  98. /**
  99.  * Notify the cache that a managed shader has changed.
  100.  */
  101. static void
  102. ilo_shader_cache_notify_change(struct ilo_shader_cache *shc,
  103.                                struct ilo_shader_state *shader)
  104. {
  105.    if (shader->cache == shc) {
  106.       list_del(&shader->list);
  107.       list_add(&shader->list, &shc->changed);
  108.    }
  109. }
  110.  
  111. /**
  112.  * Upload managed shaders to the bo.  Only shaders that are changed or added
  113.  * after the last upload are uploaded.
  114.  */
  115. void
  116. ilo_shader_cache_upload(struct ilo_shader_cache *shc,
  117.                         struct ilo_builder *builder)
  118. {
  119.    struct ilo_shader_state *shader, *next;
  120.  
  121.    LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->changed, list) {
  122.       struct ilo_shader *sh;
  123.  
  124.       LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
  125.          if (sh->uploaded)
  126.             continue;
  127.  
  128.          sh->cache_offset = ilo_builder_instruction_write(builder,
  129.                sh->kernel_size, sh->kernel);
  130.  
  131.          sh->uploaded = true;
  132.       }
  133.  
  134.       list_del(&shader->list);
  135.       list_add(&shader->list, &shc->shaders);
  136.    }
  137. }
  138.  
  139. /**
  140.  * Invalidate all shaders so that they get uploaded in next
  141.  * ilo_shader_cache_upload().
  142.  */
  143. void
  144. ilo_shader_cache_invalidate(struct ilo_shader_cache *shc)
  145. {
  146.    struct ilo_shader_state *shader, *next;
  147.  
  148.    LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->shaders, list) {
  149.       list_del(&shader->list);
  150.       list_add(&shader->list, &shc->changed);
  151.    }
  152.  
  153.    LIST_FOR_EACH_ENTRY(shader, &shc->changed, list) {
  154.       struct ilo_shader *sh;
  155.  
  156.       LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
  157.          sh->uploaded = false;
  158.    }
  159. }
  160.  
  161. /**
  162.  * Initialize a shader variant.
  163.  */
  164. void
  165. ilo_shader_variant_init(struct ilo_shader_variant *variant,
  166.                         const struct ilo_shader_info *info,
  167.                         const struct ilo_state_vector *vec)
  168. {
  169.    int num_views, i;
  170.  
  171.    memset(variant, 0, sizeof(*variant));
  172.  
  173.    switch (info->type) {
  174.    case PIPE_SHADER_VERTEX:
  175.       variant->u.vs.rasterizer_discard =
  176.          vec->rasterizer->state.rasterizer_discard;
  177.       variant->u.vs.num_ucps =
  178.          util_last_bit(vec->rasterizer->state.clip_plane_enable);
  179.       break;
  180.    case PIPE_SHADER_GEOMETRY:
  181.       variant->u.gs.rasterizer_discard =
  182.          vec->rasterizer->state.rasterizer_discard;
  183.       variant->u.gs.num_inputs = vec->vs->shader->out.count;
  184.       for (i = 0; i < vec->vs->shader->out.count; i++) {
  185.          variant->u.gs.semantic_names[i] =
  186.             vec->vs->shader->out.semantic_names[i];
  187.          variant->u.gs.semantic_indices[i] =
  188.             vec->vs->shader->out.semantic_indices[i];
  189.       }
  190.       break;
  191.    case PIPE_SHADER_FRAGMENT:
  192.       variant->u.fs.flatshade =
  193.          (info->has_color_interp && vec->rasterizer->state.flatshade);
  194.       variant->u.fs.fb_height = (info->has_pos) ?
  195.          vec->fb.state.height : 1;
  196.       variant->u.fs.num_cbufs = vec->fb.state.nr_cbufs;
  197.       break;
  198.    default:
  199.       assert(!"unknown shader type");
  200.       break;
  201.    }
  202.  
  203.    /* use PCB unless constant buffer 0 is not in user buffer  */
  204.    if ((vec->cbuf[info->type].enabled_mask & 0x1) &&
  205.        !vec->cbuf[info->type].cso[0].user_buffer)
  206.       variant->use_pcb = false;
  207.    else
  208.       variant->use_pcb = true;
  209.  
  210.    num_views = vec->view[info->type].count;
  211.    assert(info->num_samplers <= num_views);
  212.  
  213.    variant->num_sampler_views = info->num_samplers;
  214.    for (i = 0; i < info->num_samplers; i++) {
  215.       const struct pipe_sampler_view *view = vec->view[info->type].states[i];
  216.       const struct ilo_sampler_cso *sampler = vec->sampler[info->type].cso[i];
  217.  
  218.       if (view) {
  219.          variant->sampler_view_swizzles[i].r = view->swizzle_r;
  220.          variant->sampler_view_swizzles[i].g = view->swizzle_g;
  221.          variant->sampler_view_swizzles[i].b = view->swizzle_b;
  222.          variant->sampler_view_swizzles[i].a = view->swizzle_a;
  223.       }
  224.       else if (info->shadow_samplers & (1 << i)) {
  225.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  226.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
  227.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
  228.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
  229.       }
  230.       else {
  231.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  232.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
  233.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
  234.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
  235.       }
  236.  
  237.       /*
  238.        * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
  239.        * the HW wrap mode is set to GEN6_TEXCOORDMODE_CLAMP_BORDER, and we
  240.        * need to manually saturate the texture coordinates.
  241.        */
  242.       if (sampler) {
  243.          variant->saturate_tex_coords[0] |= sampler->saturate_s << i;
  244.          variant->saturate_tex_coords[1] |= sampler->saturate_t << i;
  245.          variant->saturate_tex_coords[2] |= sampler->saturate_r << i;
  246.       }
  247.    }
  248. }
  249.  
  250. /**
  251.  * Guess the shader variant, knowing that the context may still change.
  252.  */
  253. static void
  254. ilo_shader_variant_guess(struct ilo_shader_variant *variant,
  255.                          const struct ilo_shader_info *info,
  256.                          const struct ilo_state_vector *vec)
  257. {
  258.    int i;
  259.  
  260.    memset(variant, 0, sizeof(*variant));
  261.  
  262.    switch (info->type) {
  263.    case PIPE_SHADER_VERTEX:
  264.       break;
  265.    case PIPE_SHADER_GEOMETRY:
  266.       break;
  267.    case PIPE_SHADER_FRAGMENT:
  268.       variant->u.fs.flatshade = false;
  269.       variant->u.fs.fb_height = (info->has_pos) ?
  270.          vec->fb.state.height : 1;
  271.       variant->u.fs.num_cbufs = 1;
  272.       break;
  273.    default:
  274.       assert(!"unknown shader type");
  275.       break;
  276.    }
  277.  
  278.    variant->use_pcb = true;
  279.  
  280.    variant->num_sampler_views = info->num_samplers;
  281.    for (i = 0; i < info->num_samplers; i++) {
  282.       if (info->shadow_samplers & (1 << i)) {
  283.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  284.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
  285.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
  286.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
  287.       }
  288.       else {
  289.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  290.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
  291.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
  292.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
  293.       }
  294.    }
  295. }
  296.  
  297.  
  298. /**
  299.  * Parse a TGSI instruction for the shader info.
  300.  */
  301. static void
  302. ilo_shader_info_parse_inst(struct ilo_shader_info *info,
  303.                            const struct tgsi_full_instruction *inst)
  304. {
  305.    int i;
  306.  
  307.    /* look for edgeflag passthrough */
  308.    if (info->edgeflag_out >= 0 &&
  309.        inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
  310.        inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
  311.        inst->Dst[0].Register.Index == info->edgeflag_out) {
  312.  
  313.       assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
  314.       info->edgeflag_in = inst->Src[0].Register.Index;
  315.    }
  316.  
  317.    if (inst->Instruction.Texture) {
  318.       bool shadow;
  319.  
  320.       switch (inst->Texture.Texture) {
  321.       case TGSI_TEXTURE_SHADOW1D:
  322.       case TGSI_TEXTURE_SHADOW2D:
  323.       case TGSI_TEXTURE_SHADOWRECT:
  324.       case TGSI_TEXTURE_SHADOW1D_ARRAY:
  325.       case TGSI_TEXTURE_SHADOW2D_ARRAY:
  326.       case TGSI_TEXTURE_SHADOWCUBE:
  327.       case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  328.          shadow = true;
  329.          break;
  330.       default:
  331.          shadow = false;
  332.          break;
  333.       }
  334.  
  335.       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
  336.          const struct tgsi_full_src_register *src = &inst->Src[i];
  337.  
  338.          if (src->Register.File == TGSI_FILE_SAMPLER) {
  339.             const int idx = src->Register.Index;
  340.  
  341.             if (idx >= info->num_samplers)
  342.                info->num_samplers = idx + 1;
  343.  
  344.             if (shadow)
  345.                info->shadow_samplers |= 1 << idx;
  346.          }
  347.       }
  348.    }
  349. }
  350.  
  351. /**
  352.  * Parse a TGSI property for the shader info.
  353.  */
  354. static void
  355. ilo_shader_info_parse_prop(struct ilo_shader_info *info,
  356.                            const struct tgsi_full_property *prop)
  357. {
  358.    switch (prop->Property.PropertyName) {
  359.    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
  360.       info->fs_color0_writes_all_cbufs = prop->u[0].Data;
  361.       break;
  362.    default:
  363.       break;
  364.    }
  365. }
  366.  
  367. /**
  368.  * Parse a TGSI declaration for the shader info.
  369.  */
  370. static void
  371. ilo_shader_info_parse_decl(struct ilo_shader_info *info,
  372.                            const struct tgsi_full_declaration *decl)
  373. {
  374.    switch (decl->Declaration.File) {
  375.    case TGSI_FILE_INPUT:
  376.       if (decl->Declaration.Interpolate &&
  377.           decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
  378.          info->has_color_interp = true;
  379.       if (decl->Declaration.Semantic &&
  380.           decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
  381.          info->has_pos = true;
  382.       break;
  383.    case TGSI_FILE_OUTPUT:
  384.       if (decl->Declaration.Semantic &&
  385.           decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
  386.          info->edgeflag_out = decl->Range.First;
  387.       break;
  388.    case TGSI_FILE_CONSTANT:
  389.       {
  390.          const int idx = (decl->Declaration.Dimension) ?
  391.             decl->Dim.Index2D : 0;
  392.          if (info->constant_buffer_count <= idx)
  393.             info->constant_buffer_count = idx + 1;
  394.       }
  395.       break;
  396.    case TGSI_FILE_SYSTEM_VALUE:
  397.       if (decl->Declaration.Semantic &&
  398.           decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
  399.          info->has_instanceid = true;
  400.       if (decl->Declaration.Semantic &&
  401.           decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
  402.          info->has_vertexid = true;
  403.       break;
  404.    default:
  405.       break;
  406.    }
  407. }
  408.  
  409. static void
  410. ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
  411. {
  412.    struct tgsi_parse_context parse;
  413.  
  414.    info->edgeflag_in = -1;
  415.    info->edgeflag_out = -1;
  416.  
  417.    tgsi_parse_init(&parse, info->tokens);
  418.    while (!tgsi_parse_end_of_tokens(&parse)) {
  419.       const union tgsi_full_token *token;
  420.  
  421.       tgsi_parse_token(&parse);
  422.       token = &parse.FullToken;
  423.  
  424.       switch (token->Token.Type) {
  425.       case TGSI_TOKEN_TYPE_DECLARATION:
  426.          ilo_shader_info_parse_decl(info, &token->FullDeclaration);
  427.          break;
  428.       case TGSI_TOKEN_TYPE_INSTRUCTION:
  429.          ilo_shader_info_parse_inst(info, &token->FullInstruction);
  430.          break;
  431.       case TGSI_TOKEN_TYPE_PROPERTY:
  432.          ilo_shader_info_parse_prop(info, &token->FullProperty);
  433.          break;
  434.       default:
  435.          break;
  436.       }
  437.    }
  438.    tgsi_parse_free(&parse);
  439. }
  440.  
  441. /**
  442.  * Create a shader state.
  443.  */
  444. static struct ilo_shader_state *
  445. ilo_shader_state_create(const struct ilo_dev *dev,
  446.                         const struct ilo_state_vector *vec,
  447.                         int type, const void *templ)
  448. {
  449.    struct ilo_shader_state *state;
  450.    struct ilo_shader_variant variant;
  451.  
  452.    state = CALLOC_STRUCT(ilo_shader_state);
  453.    if (!state)
  454.       return NULL;
  455.  
  456.    state->info.dev = dev;
  457.    state->info.type = type;
  458.  
  459.    if (type == PIPE_SHADER_COMPUTE) {
  460.       const struct pipe_compute_state *c =
  461.          (const struct pipe_compute_state *) templ;
  462.  
  463.       state->info.tokens = tgsi_dup_tokens(c->prog);
  464.       state->info.compute.req_local_mem = c->req_local_mem;
  465.       state->info.compute.req_private_mem = c->req_private_mem;
  466.       state->info.compute.req_input_mem = c->req_input_mem;
  467.    }
  468.    else {
  469.       const struct pipe_shader_state *s =
  470.          (const struct pipe_shader_state *) templ;
  471.  
  472.       state->info.tokens = tgsi_dup_tokens(s->tokens);
  473.       state->info.stream_output = s->stream_output;
  474.    }
  475.  
  476.    list_inithead(&state->variants);
  477.  
  478.    ilo_shader_info_parse_tokens(&state->info);
  479.  
  480.    /* guess and compile now */
  481.    ilo_shader_variant_guess(&variant, &state->info, vec);
  482.    if (!ilo_shader_state_use_variant(state, &variant)) {
  483.       ilo_shader_destroy(state);
  484.       return NULL;
  485.    }
  486.  
  487.    return state;
  488. }
  489.  
  490. /**
  491.  * Add a compiled shader to the shader state.
  492.  */
  493. static void
  494. ilo_shader_state_add_shader(struct ilo_shader_state *state,
  495.                             struct ilo_shader *sh)
  496. {
  497.    list_add(&sh->list, &state->variants);
  498.    state->num_variants++;
  499.    state->total_size += sh->kernel_size;
  500.  
  501.    if (state->cache)
  502.       ilo_shader_cache_notify_change(state->cache, state);
  503. }
  504.  
  505. /**
  506.  * Remove a compiled shader from the shader state.
  507.  */
  508. static void
  509. ilo_shader_state_remove_shader(struct ilo_shader_state *state,
  510.                                struct ilo_shader *sh)
  511. {
  512.    list_del(&sh->list);
  513.    state->num_variants--;
  514.    state->total_size -= sh->kernel_size;
  515. }
  516.  
  517. /**
  518.  * Garbage collect shader variants in the shader state.
  519.  */
  520. static void
  521. ilo_shader_state_gc(struct ilo_shader_state *state)
  522. {
  523.    /* activate when the variants take up more than 4KiB of space */
  524.    const int limit = 4 * 1024;
  525.    struct ilo_shader *sh, *next;
  526.  
  527.    if (state->total_size < limit)
  528.       return;
  529.  
  530.    /* remove from the tail as the most recently ones are at the head */
  531.    LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
  532.       ilo_shader_state_remove_shader(state, sh);
  533.       ilo_shader_destroy_kernel(sh);
  534.  
  535.       if (state->total_size <= limit / 2)
  536.          break;
  537.    }
  538. }
  539.  
  540. /**
  541.  * Search for a shader variant.
  542.  */
  543. static struct ilo_shader *
  544. ilo_shader_state_search_variant(struct ilo_shader_state *state,
  545.                                 const struct ilo_shader_variant *variant)
  546. {
  547.    struct ilo_shader *sh = NULL, *tmp;
  548.  
  549.    LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
  550.       if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
  551.          sh = tmp;
  552.          break;
  553.       }
  554.    }
  555.  
  556.    return sh;
  557. }
  558.  
  559. static void
  560. copy_so_info(struct ilo_shader *sh,
  561.              const struct pipe_stream_output_info *so_info)
  562. {
  563.    unsigned i, attr;
  564.  
  565.    if (!so_info->num_outputs)
  566.       return;
  567.  
  568.    sh->so_info = *so_info;
  569.  
  570.    for (i = 0; i < so_info->num_outputs; i++) {
  571.       /* figure out which attribute is sourced */
  572.       for (attr = 0; attr < sh->out.count; attr++) {
  573.          const int reg_idx = sh->out.register_indices[attr];
  574.          if (reg_idx == so_info->output[i].register_index)
  575.             break;
  576.       }
  577.  
  578.       if (attr < sh->out.count) {
  579.          sh->so_info.output[i].register_index = attr;
  580.       }
  581.       else {
  582.          assert(!"stream output an undefined register");
  583.          sh->so_info.output[i].register_index = 0;
  584.       }
  585.  
  586.       /* PSIZE is at W channel */
  587.       if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
  588.          assert(so_info->output[i].start_component == 0);
  589.          assert(so_info->output[i].num_components == 1);
  590.          sh->so_info.output[i].start_component = 3;
  591.       }
  592.    }
  593. }
  594.  
  595. /**
  596.  * Add a shader variant to the shader state.
  597.  */
  598. static struct ilo_shader *
  599. ilo_shader_state_add_variant(struct ilo_shader_state *state,
  600.                              const struct ilo_shader_variant *variant)
  601. {
  602.    struct ilo_shader *sh;
  603.  
  604.    switch (state->info.type) {
  605.    case PIPE_SHADER_VERTEX:
  606.       sh = ilo_shader_compile_vs(state, variant);
  607.       break;
  608.    case PIPE_SHADER_FRAGMENT:
  609.       sh = ilo_shader_compile_fs(state, variant);
  610.       break;
  611.    case PIPE_SHADER_GEOMETRY:
  612.       sh = ilo_shader_compile_gs(state, variant);
  613.       break;
  614.    case PIPE_SHADER_COMPUTE:
  615.       sh = ilo_shader_compile_cs(state, variant);
  616.       break;
  617.    default:
  618.       sh = NULL;
  619.       break;
  620.    }
  621.    if (!sh) {
  622.       assert(!"failed to compile shader");
  623.       return NULL;
  624.    }
  625.  
  626.    sh->variant = *variant;
  627.  
  628.    copy_so_info(sh, &state->info.stream_output);
  629.  
  630.    ilo_shader_state_add_shader(state, sh);
  631.  
  632.    return sh;
  633. }
  634.  
  635. /**
  636.  * Update state->shader to point to a variant.  If the variant does not exist,
  637.  * it will be added first.
  638.  */
  639. bool
  640. ilo_shader_state_use_variant(struct ilo_shader_state *state,
  641.                              const struct ilo_shader_variant *variant)
  642. {
  643.    struct ilo_shader *sh;
  644.    bool construct_cso = false;
  645.  
  646.    sh = ilo_shader_state_search_variant(state, variant);
  647.    if (!sh) {
  648.       ilo_shader_state_gc(state);
  649.  
  650.       sh = ilo_shader_state_add_variant(state, variant);
  651.       if (!sh)
  652.          return false;
  653.  
  654.       construct_cso = true;
  655.    }
  656.  
  657.    /* move to head */
  658.    if (state->variants.next != &sh->list) {
  659.       list_del(&sh->list);
  660.       list_add(&sh->list, &state->variants);
  661.    }
  662.  
  663.    state->shader = sh;
  664.  
  665.    if (construct_cso) {
  666.       switch (state->info.type) {
  667.       case PIPE_SHADER_VERTEX:
  668.          ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
  669.          break;
  670.       case PIPE_SHADER_GEOMETRY:
  671.          ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
  672.          break;
  673.       case PIPE_SHADER_FRAGMENT:
  674.          ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
  675.          break;
  676.       default:
  677.          break;
  678.       }
  679.    }
  680.  
  681.    return true;
  682. }
  683.  
  684. struct ilo_shader_state *
  685. ilo_shader_create_vs(const struct ilo_dev *dev,
  686.                      const struct pipe_shader_state *state,
  687.                      const struct ilo_state_vector *precompile)
  688. {
  689.    struct ilo_shader_state *shader;
  690.  
  691.    shader = ilo_shader_state_create(dev, precompile,
  692.          PIPE_SHADER_VERTEX, state);
  693.  
  694.    /* states used in ilo_shader_variant_init() */
  695.    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS |
  696.                                         ILO_DIRTY_RASTERIZER |
  697.                                         ILO_DIRTY_CBUF;
  698.  
  699.    return shader;
  700. }
  701.  
  702. struct ilo_shader_state *
  703. ilo_shader_create_gs(const struct ilo_dev *dev,
  704.                      const struct pipe_shader_state *state,
  705.                      const struct ilo_state_vector *precompile)
  706. {
  707.    struct ilo_shader_state *shader;
  708.  
  709.    shader = ilo_shader_state_create(dev, precompile,
  710.          PIPE_SHADER_GEOMETRY, state);
  711.  
  712.    /* states used in ilo_shader_variant_init() */
  713.    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS |
  714.                                         ILO_DIRTY_VS |
  715.                                         ILO_DIRTY_RASTERIZER |
  716.                                         ILO_DIRTY_CBUF;
  717.  
  718.    return shader;
  719. }
  720.  
  721. struct ilo_shader_state *
  722. ilo_shader_create_fs(const struct ilo_dev *dev,
  723.                      const struct pipe_shader_state *state,
  724.                      const struct ilo_state_vector *precompile)
  725. {
  726.    struct ilo_shader_state *shader;
  727.  
  728.    shader = ilo_shader_state_create(dev, precompile,
  729.          PIPE_SHADER_FRAGMENT, state);
  730.  
  731.    /* states used in ilo_shader_variant_init() */
  732.    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS |
  733.                                         ILO_DIRTY_RASTERIZER |
  734.                                         ILO_DIRTY_FB |
  735.                                         ILO_DIRTY_CBUF;
  736.  
  737.    return shader;
  738. }
  739.  
  740. struct ilo_shader_state *
  741. ilo_shader_create_cs(const struct ilo_dev *dev,
  742.                      const struct pipe_compute_state *state,
  743.                      const struct ilo_state_vector *precompile)
  744. {
  745.    struct ilo_shader_state *shader;
  746.  
  747.    shader = ilo_shader_state_create(dev, precompile,
  748.          PIPE_SHADER_COMPUTE, state);
  749.  
  750.    shader->info.non_orthogonal_states = 0;
  751.  
  752.    return shader;
  753. }
  754.  
  755. /**
  756.  * Destroy a shader state.
  757.  */
  758. void
  759. ilo_shader_destroy(struct ilo_shader_state *shader)
  760. {
  761.    struct ilo_shader *sh, *next;
  762.  
  763.    LIST_FOR_EACH_ENTRY_SAFE(sh, next, &shader->variants, list)
  764.       ilo_shader_destroy_kernel(sh);
  765.  
  766.    FREE((struct tgsi_token *) shader->info.tokens);
  767.    FREE(shader);
  768. }
  769.  
  770. /**
  771.  * Return the type (PIPE_SHADER_x) of the shader.
  772.  */
  773. int
  774. ilo_shader_get_type(const struct ilo_shader_state *shader)
  775. {
  776.    return shader->info.type;
  777. }
  778.  
  779. /**
  780.  * Select a kernel for the given context.  This will compile a new kernel if
  781.  * none of the existing kernels work with the context.
  782.  *
  783.  * \param ilo the context
  784.  * \param dirty states of the context that are considered changed
  785.  * \return true if a different kernel is selected
  786.  */
  787. bool
  788. ilo_shader_select_kernel(struct ilo_shader_state *shader,
  789.                          const struct ilo_state_vector *vec,
  790.                          uint32_t dirty)
  791. {
  792.    const struct ilo_shader * const cur = shader->shader;
  793.    struct ilo_shader_variant variant;
  794.  
  795.    if (!(shader->info.non_orthogonal_states & dirty))
  796.       return false;
  797.  
  798.    ilo_shader_variant_init(&variant, &shader->info, vec);
  799.    ilo_shader_state_use_variant(shader, &variant);
  800.  
  801.    return (shader->shader != cur);
  802. }
  803.  
  804. static int
  805. route_attr(const int *semantics, const int *indices, int len,
  806.            int semantic, int index)
  807. {
  808.    int i;
  809.  
  810.    for (i = 0; i < len; i++) {
  811.       if (semantics[i] == semantic && indices[i] == index)
  812.          return i;
  813.    }
  814.  
  815.    /* failed to match for COLOR, try BCOLOR */
  816.    if (semantic == TGSI_SEMANTIC_COLOR) {
  817.       for (i = 0; i < len; i++) {
  818.          if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
  819.             return i;
  820.       }
  821.    }
  822.  
  823.    return -1;
  824. }
  825.  
  826. /**
  827.  * Select a routing for the given source shader and rasterizer state.
  828.  *
  829.  * \return true if a different routing is selected
  830.  */
  831. bool
  832. ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
  833.                                  const struct ilo_shader_state *source,
  834.                                  const struct ilo_rasterizer_state *rasterizer)
  835. {
  836.    const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
  837.    const bool light_twoside = rasterizer->state.light_twoside;
  838.    struct ilo_shader *kernel = shader->shader;
  839.    struct ilo_kernel_routing *routing = &kernel->routing;
  840.    const int *src_semantics, *src_indices;
  841.    int src_len, max_src_slot;
  842.    int dst_len, dst_slot;
  843.  
  844.    /* we are constructing 3DSTATE_SBE here */
  845.    ILO_DEV_ASSERT(shader->info.dev, 6, 8);
  846.  
  847.    assert(kernel);
  848.  
  849.    if (source) {
  850.       assert(source->shader);
  851.       src_semantics = source->shader->out.semantic_names;
  852.       src_indices = source->shader->out.semantic_indices;
  853.       src_len = source->shader->out.count;
  854.    }
  855.    else {
  856.       src_semantics = kernel->in.semantic_names;
  857.       src_indices = kernel->in.semantic_indices;
  858.       src_len = kernel->in.count;
  859.    }
  860.  
  861.    /* no change */
  862.    if (kernel->routing_initialized &&
  863.        routing->source_skip + routing->source_len <= src_len &&
  864.        kernel->routing_sprite_coord_enable == sprite_coord_enable &&
  865.        !memcmp(kernel->routing_src_semantics,
  866.           &src_semantics[routing->source_skip],
  867.           sizeof(kernel->routing_src_semantics[0]) * routing->source_len) &&
  868.        !memcmp(kernel->routing_src_indices,
  869.           &src_indices[routing->source_skip],
  870.           sizeof(kernel->routing_src_indices[0]) * routing->source_len))
  871.       return false;
  872.  
  873.    if (source) {
  874.       /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
  875.       assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
  876.       assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
  877.       routing->source_skip = 2;
  878.  
  879.       routing->source_len = src_len - routing->source_skip;
  880.       src_semantics += routing->source_skip;
  881.       src_indices += routing->source_skip;
  882.    }
  883.    else {
  884.       routing->source_skip = 0;
  885.       routing->source_len = src_len;
  886.    }
  887.  
  888.    routing->const_interp_enable = kernel->in.const_interp_enable;
  889.    routing->point_sprite_enable = 0;
  890.    routing->swizzle_enable = false;
  891.  
  892.    assert(kernel->in.count <= Elements(routing->swizzles));
  893.    dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
  894.    max_src_slot = -1;
  895.  
  896.    for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
  897.       const int semantic = kernel->in.semantic_names[dst_slot];
  898.       const int index = kernel->in.semantic_indices[dst_slot];
  899.       int src_slot;
  900.  
  901.       if (semantic == TGSI_SEMANTIC_GENERIC &&
  902.           (sprite_coord_enable & (1 << index)))
  903.          routing->point_sprite_enable |= 1 << dst_slot;
  904.  
  905.       if (source) {
  906.          src_slot = route_attr(src_semantics, src_indices,
  907.                routing->source_len, semantic, index);
  908.  
  909.          /*
  910.           * The source shader stage does not output this attribute.  The value
  911.           * is supposed to be undefined, unless the attribute goes through
  912.           * point sprite replacement or the attribute is
  913.           * TGSI_SEMANTIC_POSITION.  In all cases, we do not care which source
  914.           * attribute is picked.
  915.           *
  916.           * We should update the kernel code and omit the output of
  917.           * TGSI_SEMANTIC_POSITION here.
  918.           */
  919.          if (src_slot < 0)
  920.             src_slot = 0;
  921.       }
  922.       else {
  923.          src_slot = dst_slot;
  924.       }
  925.  
  926.       routing->swizzles[dst_slot] = src_slot;
  927.  
  928.       /* use the following slot for two-sided lighting */
  929.       if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
  930.           src_slot + 1 < routing->source_len &&
  931.           src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
  932.           src_indices[src_slot + 1] == index) {
  933.          routing->swizzles[dst_slot] |= GEN8_SBE_SWIZ_INPUTATTR_FACING;
  934.          src_slot++;
  935.       }
  936.  
  937.       if (routing->swizzles[dst_slot] != dst_slot)
  938.          routing->swizzle_enable = true;
  939.  
  940.       if (max_src_slot < src_slot)
  941.          max_src_slot = src_slot;
  942.    }
  943.  
  944.    memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
  945.          sizeof(routing->swizzles[0]) * dst_slot);
  946.  
  947.    /*
  948.     * From the Sandy Bridge PRM, volume 2 part 1, page 248:
  949.     *
  950.     *     "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
  951.     *      0 indicating no Vertex URB data to be read.
  952.     *
  953.     *      This field should be set to the minimum length required to read the
  954.     *      maximum source attribute. The maximum source attribute is indicated
  955.     *      by the maximum value of the enabled Attribute # Source Attribute if
  956.     *      Attribute Swizzle Enable is set, Number of Output Attributes-1 if
  957.     *      enable is not set.
  958.     *
  959.     *        read_length = ceiling((max_source_attr+1)/2)
  960.     *
  961.     *      [errata] Corruption/Hang possible if length programmed larger than
  962.     *      recommended"
  963.     */
  964.    routing->source_len = max_src_slot + 1;
  965.  
  966.    /* remember the states of the source */
  967.    kernel->routing_initialized = true;
  968.    kernel->routing_sprite_coord_enable = sprite_coord_enable;
  969.    memcpy(kernel->routing_src_semantics, src_semantics,
  970.          sizeof(kernel->routing_src_semantics[0]) * routing->source_len);
  971.    memcpy(kernel->routing_src_indices, src_indices,
  972.          sizeof(kernel->routing_src_indices[0]) * routing->source_len);
  973.  
  974.    return true;
  975. }
  976.  
  977. /**
  978.  * Return the cache offset of the selected kernel.  This must be called after
  979.  * ilo_shader_select_kernel() and ilo_shader_cache_upload().
  980.  */
  981. uint32_t
  982. ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader)
  983. {
  984.    const struct ilo_shader *kernel = shader->shader;
  985.  
  986.    assert(kernel && kernel->uploaded);
  987.  
  988.    return kernel->cache_offset;
  989. }
  990.  
  991. /**
  992.  * Query a kernel parameter for the selected kernel.
  993.  */
  994. int
  995. ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
  996.                             enum ilo_kernel_param param)
  997. {
  998.    const struct ilo_shader *kernel = shader->shader;
  999.    int val;
  1000.  
  1001.    assert(kernel);
  1002.  
  1003.    switch (param) {
  1004.    case ILO_KERNEL_INPUT_COUNT:
  1005.       val = kernel->in.count;
  1006.       break;
  1007.    case ILO_KERNEL_OUTPUT_COUNT:
  1008.       val = kernel->out.count;
  1009.       break;
  1010.    case ILO_KERNEL_SAMPLER_COUNT:
  1011.       val = shader->info.num_samplers;
  1012.       break;
  1013.    case ILO_KERNEL_URB_DATA_START_REG:
  1014.       val = kernel->in.start_grf;
  1015.       break;
  1016.    case ILO_KERNEL_SKIP_CBUF0_UPLOAD:
  1017.       val = kernel->skip_cbuf0_upload;
  1018.       break;
  1019.    case ILO_KERNEL_PCB_CBUF0_SIZE:
  1020.       val = kernel->pcb.cbuf0_size;
  1021.       break;
  1022.  
  1023.    case ILO_KERNEL_SURFACE_TOTAL_COUNT:
  1024.       val = kernel->bt.total_count;
  1025.       break;
  1026.    case ILO_KERNEL_SURFACE_TEX_BASE:
  1027.       val = kernel->bt.tex_base;
  1028.       break;
  1029.    case ILO_KERNEL_SURFACE_TEX_COUNT:
  1030.       val = kernel->bt.tex_count;
  1031.       break;
  1032.    case ILO_KERNEL_SURFACE_CONST_BASE:
  1033.       val = kernel->bt.const_base;
  1034.       break;
  1035.    case ILO_KERNEL_SURFACE_CONST_COUNT:
  1036.       val = kernel->bt.const_count;
  1037.       break;
  1038.    case ILO_KERNEL_SURFACE_RES_BASE:
  1039.       val = kernel->bt.res_base;
  1040.       break;
  1041.    case ILO_KERNEL_SURFACE_RES_COUNT:
  1042.       val = kernel->bt.res_count;
  1043.       break;
  1044.  
  1045.    case ILO_KERNEL_VS_INPUT_INSTANCEID:
  1046.       val = shader->info.has_instanceid;
  1047.       break;
  1048.    case ILO_KERNEL_VS_INPUT_VERTEXID:
  1049.       val = shader->info.has_vertexid;
  1050.       break;
  1051.    case ILO_KERNEL_VS_INPUT_EDGEFLAG:
  1052.       if (shader->info.edgeflag_in >= 0) {
  1053.          /* we rely on the state tracker here */
  1054.          assert(shader->info.edgeflag_in == kernel->in.count - 1);
  1055.          val = true;
  1056.       }
  1057.       else {
  1058.          val = false;
  1059.       }
  1060.       break;
  1061.    case ILO_KERNEL_VS_PCB_UCP_SIZE:
  1062.       val = kernel->pcb.clip_state_size;
  1063.       break;
  1064.    case ILO_KERNEL_VS_GEN6_SO:
  1065.       val = kernel->stream_output;
  1066.       break;
  1067.    case ILO_KERNEL_VS_GEN6_SO_START_REG:
  1068.       val = kernel->gs_start_grf;
  1069.       break;
  1070.    case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET:
  1071.       val = kernel->gs_offsets[0];
  1072.       break;
  1073.    case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET:
  1074.       val = kernel->gs_offsets[1];
  1075.       break;
  1076.    case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET:
  1077.       val = kernel->gs_offsets[2];
  1078.       break;
  1079.    case ILO_KERNEL_VS_GEN6_SO_SURFACE_COUNT:
  1080.       val = kernel->gs_bt_so_count;
  1081.       break;
  1082.  
  1083.    case ILO_KERNEL_GS_DISCARD_ADJACENCY:
  1084.       val = kernel->in.discard_adj;
  1085.       break;
  1086.    case ILO_KERNEL_GS_GEN6_SVBI_POST_INC:
  1087.       val = kernel->svbi_post_inc;
  1088.       break;
  1089.    case ILO_KERNEL_GS_GEN6_SURFACE_SO_BASE:
  1090.       val = kernel->bt.gen6_so_base;
  1091.       break;
  1092.    case ILO_KERNEL_GS_GEN6_SURFACE_SO_COUNT:
  1093.       val = kernel->bt.gen6_so_count;
  1094.       break;
  1095.  
  1096.    case ILO_KERNEL_FS_INPUT_Z:
  1097.    case ILO_KERNEL_FS_INPUT_W:
  1098.       val = kernel->in.has_pos;
  1099.       break;
  1100.    case ILO_KERNEL_FS_OUTPUT_Z:
  1101.       val = kernel->out.has_pos;
  1102.       break;
  1103.    case ILO_KERNEL_FS_USE_KILL:
  1104.       val = kernel->has_kill;
  1105.       break;
  1106.    case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS:
  1107.       val = kernel->in.barycentric_interpolation_mode;
  1108.       break;
  1109.    case ILO_KERNEL_FS_DISPATCH_16_OFFSET:
  1110.       val = 0;
  1111.       break;
  1112.    case ILO_KERNEL_FS_SURFACE_RT_BASE:
  1113.       val = kernel->bt.rt_base;
  1114.       break;
  1115.    case ILO_KERNEL_FS_SURFACE_RT_COUNT:
  1116.       val = kernel->bt.rt_count;
  1117.       break;
  1118.  
  1119.    case ILO_KERNEL_CS_LOCAL_SIZE:
  1120.       val = shader->info.compute.req_local_mem;
  1121.       break;
  1122.    case ILO_KERNEL_CS_PRIVATE_SIZE:
  1123.       val = shader->info.compute.req_private_mem;
  1124.       break;
  1125.    case ILO_KERNEL_CS_INPUT_SIZE:
  1126.       val = shader->info.compute.req_input_mem;
  1127.       break;
  1128.    case ILO_KERNEL_CS_SIMD_SIZE:
  1129.       val = 16;
  1130.       break;
  1131.    case ILO_KERNEL_CS_SURFACE_GLOBAL_BASE:
  1132.       val = kernel->bt.global_base;
  1133.       break;
  1134.    case ILO_KERNEL_CS_SURFACE_GLOBAL_COUNT:
  1135.       val = kernel->bt.global_count;
  1136.       break;
  1137.  
  1138.    default:
  1139.       assert(!"unknown kernel parameter");
  1140.       val = 0;
  1141.       break;
  1142.    }
  1143.  
  1144.    return val;
  1145. }
  1146.  
  1147. /**
  1148.  * Return the CSO of the selected kernel.
  1149.  */
  1150. const struct ilo_shader_cso *
  1151. ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
  1152. {
  1153.    const struct ilo_shader *kernel = shader->shader;
  1154.  
  1155.    assert(kernel);
  1156.  
  1157.    return &kernel->cso;
  1158. }
  1159.  
  1160. /**
  1161.  * Return the SO info of the selected kernel.
  1162.  */
  1163. const struct pipe_stream_output_info *
  1164. ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
  1165. {
  1166.    const struct ilo_shader *kernel = shader->shader;
  1167.  
  1168.    assert(kernel);
  1169.  
  1170.    return &kernel->so_info;
  1171. }
  1172.  
  1173. /**
  1174.  * Return the routing info of the selected kernel.
  1175.  */
  1176. const struct ilo_kernel_routing *
  1177. ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
  1178. {
  1179.    const struct ilo_shader *kernel = shader->shader;
  1180.  
  1181.    assert(kernel);
  1182.  
  1183.    return &kernel->routing;
  1184. }
  1185.