Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "tgsi/tgsi_parse.h"
  29. #include "intel_winsys.h"
  30. #include "brw_defines.h" /* for SBE setup */
  31.  
  32. #include "shader/ilo_shader_internal.h"
  33. #include "ilo_state.h"
  34. #include "ilo_shader.h"
  35.  
  36. struct ilo_shader_cache {
  37.    struct list_head shaders;
  38.    struct list_head changed;
  39. };
  40.  
  41. /**
  42.  * Create a shader cache.  A shader cache can manage shaders and upload them
  43.  * to a bo as a whole.
  44.  */
  45. struct ilo_shader_cache *
  46. ilo_shader_cache_create(void)
  47. {
  48.    struct ilo_shader_cache *shc;
  49.  
  50.    shc = CALLOC_STRUCT(ilo_shader_cache);
  51.    if (!shc)
  52.       return NULL;
  53.  
  54.    list_inithead(&shc->shaders);
  55.    list_inithead(&shc->changed);
  56.  
  57.    return shc;
  58. }
  59.  
  60. /**
  61.  * Destroy a shader cache.
  62.  */
  63. void
  64. ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
  65. {
  66.    FREE(shc);
  67. }
  68.  
  69. /**
  70.  * Add a shader to the cache.
  71.  */
  72. void
  73. ilo_shader_cache_add(struct ilo_shader_cache *shc,
  74.                      struct ilo_shader_state *shader)
  75. {
  76.    struct ilo_shader *sh;
  77.  
  78.    shader->cache = shc;
  79.    LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
  80.       sh->uploaded = false;
  81.  
  82.    list_add(&shader->list, &shc->changed);
  83. }
  84.  
  85. /**
  86.  * Remove a shader from the cache.
  87.  */
  88. void
  89. ilo_shader_cache_remove(struct ilo_shader_cache *shc,
  90.                         struct ilo_shader_state *shader)
  91. {
  92.    list_del(&shader->list);
  93.    shader->cache = NULL;
  94. }
  95.  
  96. /**
  97.  * Notify the cache that a managed shader has changed.
  98.  */
  99. static void
  100. ilo_shader_cache_notify_change(struct ilo_shader_cache *shc,
  101.                                struct ilo_shader_state *shader)
  102. {
  103.    if (shader->cache == shc) {
  104.       list_del(&shader->list);
  105.       list_add(&shader->list, &shc->changed);
  106.    }
  107. }
  108.  
  109. /**
  110.  * Upload a managed shader to the bo.
  111.  */
  112. static int
  113. ilo_shader_cache_upload_shader(struct ilo_shader_cache *shc,
  114.                                struct ilo_shader_state *shader,
  115.                                struct intel_bo *bo, unsigned offset,
  116.                                bool incremental)
  117. {
  118.    const unsigned base = offset;
  119.    struct ilo_shader *sh;
  120.  
  121.    LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
  122.       int err;
  123.  
  124.       if (incremental && sh->uploaded)
  125.          continue;
  126.  
  127.       /* kernels must be aligned to 64-byte */
  128.       offset = align(offset, 64);
  129.  
  130.       err = intel_bo_pwrite(bo, offset, sh->kernel_size, sh->kernel);
  131.       if (unlikely(err))
  132.          return -1;
  133.  
  134.       sh->uploaded = true;
  135.       sh->cache_offset = offset;
  136.  
  137.       offset += sh->kernel_size;
  138.    }
  139.  
  140.    return (int) (offset - base);
  141. }
  142.  
  143. /**
  144.  * Similar to ilo_shader_cache_upload(), except no upload happens.
  145.  */
  146. static int
  147. ilo_shader_cache_get_upload_size(struct ilo_shader_cache *shc,
  148.                                  unsigned offset,
  149.                                  bool incremental)
  150. {
  151.    const unsigned base = offset;
  152.    struct ilo_shader_state *shader;
  153.  
  154.    if (!incremental) {
  155.       LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) {
  156.          struct ilo_shader *sh;
  157.  
  158.          /* see ilo_shader_cache_upload_shader() */
  159.          LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
  160.             if (!incremental || !sh->uploaded)
  161.                offset = align(offset, 64) + sh->kernel_size;
  162.          }
  163.       }
  164.    }
  165.  
  166.    LIST_FOR_EACH_ENTRY(shader, &shc->changed, list) {
  167.       struct ilo_shader *sh;
  168.  
  169.       /* see ilo_shader_cache_upload_shader() */
  170.       LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
  171.          if (!incremental || !sh->uploaded)
  172.             offset = align(offset, 64) + sh->kernel_size;
  173.       }
  174.    }
  175.  
  176.    /*
  177.     * From the Sandy Bridge PRM, volume 4 part 2, page 112:
  178.     *
  179.     *     "Due to prefetch of the instruction stream, the EUs may attempt to
  180.     *      access up to 8 instructions (128 bytes) beyond the end of the
  181.     *      kernel program - possibly into the next memory page.  Although
  182.     *      these instructions will not be executed, software must account for
  183.     *      the prefetch in order to avoid invalid page access faults."
  184.     */
  185.    if (offset > base)
  186.       offset += 128;
  187.  
  188.    return (int) (offset - base);
  189. }
  190.  
  191. /**
  192.  * Upload managed shaders to the bo.  When incremental is true, only shaders
  193.  * that are changed or added after the last upload are uploaded.
  194.  */
  195. int
  196. ilo_shader_cache_upload(struct ilo_shader_cache *shc,
  197.                         struct intel_bo *bo, unsigned offset,
  198.                         bool incremental)
  199. {
  200.    struct ilo_shader_state *shader, *next;
  201.    int size = 0, s;
  202.  
  203.    if (!bo)
  204.       return ilo_shader_cache_get_upload_size(shc, offset, incremental);
  205.  
  206.    if (!incremental) {
  207.       LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) {
  208.          s = ilo_shader_cache_upload_shader(shc, shader,
  209.                bo, offset, incremental);
  210.          if (unlikely(s < 0))
  211.             return s;
  212.  
  213.          size += s;
  214.          offset += s;
  215.       }
  216.    }
  217.  
  218.    LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->changed, list) {
  219.       s = ilo_shader_cache_upload_shader(shc, shader,
  220.             bo, offset, incremental);
  221.       if (unlikely(s < 0))
  222.          return s;
  223.  
  224.       size += s;
  225.       offset += s;
  226.  
  227.       list_del(&shader->list);
  228.       list_add(&shader->list, &shc->shaders);
  229.    }
  230.  
  231.    return size;
  232. }
  233.  
  234. /**
  235.  * Initialize a shader variant.
  236.  */
  237. void
  238. ilo_shader_variant_init(struct ilo_shader_variant *variant,
  239.                         const struct ilo_shader_info *info,
  240.                         const struct ilo_context *ilo)
  241. {
  242.    int num_views, i;
  243.  
  244.    memset(variant, 0, sizeof(*variant));
  245.  
  246.    switch (info->type) {
  247.    case PIPE_SHADER_VERTEX:
  248.       variant->u.vs.rasterizer_discard =
  249.          ilo->rasterizer->state.rasterizer_discard;
  250.       variant->u.vs.num_ucps =
  251.          util_last_bit(ilo->rasterizer->state.clip_plane_enable);
  252.       break;
  253.    case PIPE_SHADER_GEOMETRY:
  254.       variant->u.gs.rasterizer_discard =
  255.          ilo->rasterizer->state.rasterizer_discard;
  256.       variant->u.gs.num_inputs = ilo->vs->shader->out.count;
  257.       for (i = 0; i < ilo->vs->shader->out.count; i++) {
  258.          variant->u.gs.semantic_names[i] =
  259.             ilo->vs->shader->out.semantic_names[i];
  260.          variant->u.gs.semantic_indices[i] =
  261.             ilo->vs->shader->out.semantic_indices[i];
  262.       }
  263.       break;
  264.    case PIPE_SHADER_FRAGMENT:
  265.       variant->u.fs.flatshade =
  266.          (info->has_color_interp && ilo->rasterizer->state.flatshade);
  267.       variant->u.fs.fb_height = (info->has_pos) ?
  268.          ilo->fb.state.height : 1;
  269.       variant->u.fs.num_cbufs = ilo->fb.state.nr_cbufs;
  270.       break;
  271.    default:
  272.       assert(!"unknown shader type");
  273.       break;
  274.    }
  275.  
  276.    num_views = ilo->view[info->type].count;
  277.    assert(info->num_samplers <= num_views);
  278.  
  279.    variant->num_sampler_views = info->num_samplers;
  280.    for (i = 0; i < info->num_samplers; i++) {
  281.       const struct pipe_sampler_view *view = ilo->view[info->type].states[i];
  282.       const struct ilo_sampler_cso *sampler = ilo->sampler[info->type].cso[i];
  283.  
  284.       if (view) {
  285.          variant->sampler_view_swizzles[i].r = view->swizzle_r;
  286.          variant->sampler_view_swizzles[i].g = view->swizzle_g;
  287.          variant->sampler_view_swizzles[i].b = view->swizzle_b;
  288.          variant->sampler_view_swizzles[i].a = view->swizzle_a;
  289.       }
  290.       else if (info->shadow_samplers & (1 << i)) {
  291.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  292.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
  293.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
  294.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
  295.       }
  296.       else {
  297.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  298.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
  299.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
  300.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
  301.       }
  302.  
  303.       /*
  304.        * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
  305.        * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
  306.        * to manually saturate the texture coordinates.
  307.        */
  308.       if (sampler) {
  309.          variant->saturate_tex_coords[0] |= sampler->saturate_s << i;
  310.          variant->saturate_tex_coords[1] |= sampler->saturate_t << i;
  311.          variant->saturate_tex_coords[2] |= sampler->saturate_r << i;
  312.       }
  313.    }
  314. }
  315.  
  316. /**
  317.  * Guess the shader variant, knowing that the context may still change.
  318.  */
  319. static void
  320. ilo_shader_variant_guess(struct ilo_shader_variant *variant,
  321.                          const struct ilo_shader_info *info,
  322.                          const struct ilo_context *ilo)
  323. {
  324.    int i;
  325.  
  326.    memset(variant, 0, sizeof(*variant));
  327.  
  328.    switch (info->type) {
  329.    case PIPE_SHADER_VERTEX:
  330.       break;
  331.    case PIPE_SHADER_GEOMETRY:
  332.       break;
  333.    case PIPE_SHADER_FRAGMENT:
  334.       variant->u.fs.flatshade = false;
  335.       variant->u.fs.fb_height = (info->has_pos) ?
  336.          ilo->fb.state.height : 1;
  337.       variant->u.fs.num_cbufs = 1;
  338.       break;
  339.    default:
  340.       assert(!"unknown shader type");
  341.       break;
  342.    }
  343.  
  344.    variant->num_sampler_views = info->num_samplers;
  345.    for (i = 0; i < info->num_samplers; i++) {
  346.       if (info->shadow_samplers & (1 << i)) {
  347.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  348.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
  349.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
  350.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
  351.       }
  352.       else {
  353.          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
  354.          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
  355.          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
  356.          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
  357.       }
  358.    }
  359. }
  360.  
  361.  
  362. /**
  363.  * Parse a TGSI instruction for the shader info.
  364.  */
  365. static void
  366. ilo_shader_info_parse_inst(struct ilo_shader_info *info,
  367.                            const struct tgsi_full_instruction *inst)
  368. {
  369.    int i;
  370.  
  371.    /* look for edgeflag passthrough */
  372.    if (info->edgeflag_out >= 0 &&
  373.        inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
  374.        inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
  375.        inst->Dst[0].Register.Index == info->edgeflag_out) {
  376.  
  377.       assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
  378.       info->edgeflag_in = inst->Src[0].Register.Index;
  379.    }
  380.  
  381.    if (inst->Instruction.Texture) {
  382.       bool shadow;
  383.  
  384.       switch (inst->Texture.Texture) {
  385.       case TGSI_TEXTURE_SHADOW1D:
  386.       case TGSI_TEXTURE_SHADOW2D:
  387.       case TGSI_TEXTURE_SHADOWRECT:
  388.       case TGSI_TEXTURE_SHADOW1D_ARRAY:
  389.       case TGSI_TEXTURE_SHADOW2D_ARRAY:
  390.       case TGSI_TEXTURE_SHADOWCUBE:
  391.       case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  392.          shadow = true;
  393.          break;
  394.       default:
  395.          shadow = false;
  396.          break;
  397.       }
  398.  
  399.       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
  400.          const struct tgsi_full_src_register *src = &inst->Src[i];
  401.  
  402.          if (src->Register.File == TGSI_FILE_SAMPLER) {
  403.             const int idx = src->Register.Index;
  404.  
  405.             if (idx >= info->num_samplers)
  406.                info->num_samplers = idx + 1;
  407.  
  408.             if (shadow)
  409.                info->shadow_samplers |= 1 << idx;
  410.          }
  411.       }
  412.    }
  413. }
  414.  
  415. /**
  416.  * Parse a TGSI property for the shader info.
  417.  */
  418. static void
  419. ilo_shader_info_parse_prop(struct ilo_shader_info *info,
  420.                            const struct tgsi_full_property *prop)
  421. {
  422.    switch (prop->Property.PropertyName) {
  423.    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
  424.       info->fs_color0_writes_all_cbufs = prop->u[0].Data;
  425.       break;
  426.    default:
  427.       break;
  428.    }
  429. }
  430.  
  431. /**
  432.  * Parse a TGSI declaration for the shader info.
  433.  */
  434. static void
  435. ilo_shader_info_parse_decl(struct ilo_shader_info *info,
  436.                            const struct tgsi_full_declaration *decl)
  437. {
  438.    switch (decl->Declaration.File) {
  439.    case TGSI_FILE_INPUT:
  440.       if (decl->Declaration.Interpolate &&
  441.           decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
  442.          info->has_color_interp = true;
  443.       if (decl->Declaration.Semantic &&
  444.           decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
  445.          info->has_pos = true;
  446.       break;
  447.    case TGSI_FILE_OUTPUT:
  448.       if (decl->Declaration.Semantic &&
  449.           decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
  450.          info->edgeflag_out = decl->Range.First;
  451.       break;
  452.    case TGSI_FILE_SYSTEM_VALUE:
  453.       if (decl->Declaration.Semantic &&
  454.           decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
  455.          info->has_instanceid = true;
  456.       if (decl->Declaration.Semantic &&
  457.           decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
  458.          info->has_vertexid = true;
  459.       break;
  460.    default:
  461.       break;
  462.    }
  463. }
  464.  
  465. static void
  466. ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
  467. {
  468.    struct tgsi_parse_context parse;
  469.  
  470.    info->edgeflag_in = -1;
  471.    info->edgeflag_out = -1;
  472.  
  473.    tgsi_parse_init(&parse, info->tokens);
  474.    while (!tgsi_parse_end_of_tokens(&parse)) {
  475.       const union tgsi_full_token *token;
  476.  
  477.       tgsi_parse_token(&parse);
  478.       token = &parse.FullToken;
  479.  
  480.       switch (token->Token.Type) {
  481.       case TGSI_TOKEN_TYPE_DECLARATION:
  482.          ilo_shader_info_parse_decl(info, &token->FullDeclaration);
  483.          break;
  484.       case TGSI_TOKEN_TYPE_INSTRUCTION:
  485.          ilo_shader_info_parse_inst(info, &token->FullInstruction);
  486.          break;
  487.       case TGSI_TOKEN_TYPE_PROPERTY:
  488.          ilo_shader_info_parse_prop(info, &token->FullProperty);
  489.          break;
  490.       default:
  491.          break;
  492.       }
  493.    }
  494.    tgsi_parse_free(&parse);
  495. }
  496.  
  497. /**
  498.  * Create a shader state.
  499.  */
  500. static struct ilo_shader_state *
  501. ilo_shader_state_create(const struct ilo_context *ilo,
  502.                         int type, const void *templ)
  503. {
  504.    struct ilo_shader_state *state;
  505.    struct ilo_shader_variant variant;
  506.  
  507.    state = CALLOC_STRUCT(ilo_shader_state);
  508.    if (!state)
  509.       return NULL;
  510.  
  511.    state->info.dev = ilo->dev;
  512.    state->info.type = type;
  513.  
  514.    if (type == PIPE_SHADER_COMPUTE) {
  515.       const struct pipe_compute_state *c =
  516.          (const struct pipe_compute_state *) templ;
  517.  
  518.       state->info.tokens = tgsi_dup_tokens(c->prog);
  519.       state->info.compute.req_local_mem = c->req_local_mem;
  520.       state->info.compute.req_private_mem = c->req_private_mem;
  521.       state->info.compute.req_input_mem = c->req_input_mem;
  522.    }
  523.    else {
  524.       const struct pipe_shader_state *s =
  525.          (const struct pipe_shader_state *) templ;
  526.  
  527.       state->info.tokens = tgsi_dup_tokens(s->tokens);
  528.       state->info.stream_output = s->stream_output;
  529.    }
  530.  
  531.    list_inithead(&state->variants);
  532.  
  533.    ilo_shader_info_parse_tokens(&state->info);
  534.  
  535.    /* guess and compile now */
  536.    ilo_shader_variant_guess(&variant, &state->info, ilo);
  537.    if (!ilo_shader_state_use_variant(state, &variant)) {
  538.       ilo_shader_destroy(state);
  539.       return NULL;
  540.    }
  541.  
  542.    return state;
  543. }
  544.  
  545. /**
  546.  * Add a compiled shader to the shader state.
  547.  */
  548. static void
  549. ilo_shader_state_add_shader(struct ilo_shader_state *state,
  550.                             struct ilo_shader *sh)
  551. {
  552.    list_add(&sh->list, &state->variants);
  553.    state->num_variants++;
  554.    state->total_size += sh->kernel_size;
  555.  
  556.    if (state->cache)
  557.       ilo_shader_cache_notify_change(state->cache, state);
  558. }
  559.  
  560. /**
  561.  * Remove a compiled shader from the shader state.
  562.  */
  563. static void
  564. ilo_shader_state_remove_shader(struct ilo_shader_state *state,
  565.                                struct ilo_shader *sh)
  566. {
  567.    list_del(&sh->list);
  568.    state->num_variants--;
  569.    state->total_size -= sh->kernel_size;
  570. }
  571.  
  572. /**
  573.  * Garbage collect shader variants in the shader state.
  574.  */
  575. static void
  576. ilo_shader_state_gc(struct ilo_shader_state *state)
  577. {
  578.    /* activate when the variants take up more than 4KiB of space */
  579.    const int limit = 4 * 1024;
  580.    struct ilo_shader *sh, *next;
  581.  
  582.    if (state->total_size < limit)
  583.       return;
  584.  
  585.    /* remove from the tail as the most recently ones are at the head */
  586.    LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
  587.       ilo_shader_state_remove_shader(state, sh);
  588.       ilo_shader_destroy_kernel(sh);
  589.  
  590.       if (state->total_size <= limit / 2)
  591.          break;
  592.    }
  593. }
  594.  
  595. /**
  596.  * Search for a shader variant.
  597.  */
  598. static struct ilo_shader *
  599. ilo_shader_state_search_variant(struct ilo_shader_state *state,
  600.                                 const struct ilo_shader_variant *variant)
  601. {
  602.    struct ilo_shader *sh = NULL, *tmp;
  603.  
  604.    LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
  605.       if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
  606.          sh = tmp;
  607.          break;
  608.       }
  609.    }
  610.  
  611.    return sh;
  612. }
  613.  
  614. static void
  615. copy_so_info(struct ilo_shader *sh,
  616.              const struct pipe_stream_output_info *so_info)
  617. {
  618.    unsigned i, attr;
  619.  
  620.    if (!so_info->num_outputs)
  621.       return;
  622.  
  623.    sh->so_info = *so_info;
  624.  
  625.    for (i = 0; i < so_info->num_outputs; i++) {
  626.       /* figure out which attribute is sourced */
  627.       for (attr = 0; attr < sh->out.count; attr++) {
  628.          const int reg_idx = sh->out.register_indices[attr];
  629.          if (reg_idx == so_info->output[i].register_index)
  630.             break;
  631.       }
  632.  
  633.       if (attr < sh->out.count) {
  634.          sh->so_info.output[i].register_index = attr;
  635.       }
  636.       else {
  637.          assert(!"stream output an undefined register");
  638.          sh->so_info.output[i].register_index = 0;
  639.       }
  640.  
  641.       /* PSIZE is at W channel */
  642.       if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
  643.          assert(so_info->output[i].start_component == 0);
  644.          assert(so_info->output[i].num_components == 1);
  645.          sh->so_info.output[i].start_component = 3;
  646.       }
  647.    }
  648. }
  649.  
  650. /**
  651.  * Add a shader variant to the shader state.
  652.  */
  653. static struct ilo_shader *
  654. ilo_shader_state_add_variant(struct ilo_shader_state *state,
  655.                              const struct ilo_shader_variant *variant)
  656. {
  657.    struct ilo_shader *sh;
  658.  
  659.    switch (state->info.type) {
  660.    case PIPE_SHADER_VERTEX:
  661.       sh = ilo_shader_compile_vs(state, variant);
  662.       break;
  663.    case PIPE_SHADER_FRAGMENT:
  664.       sh = ilo_shader_compile_fs(state, variant);
  665.       break;
  666.    case PIPE_SHADER_GEOMETRY:
  667.       sh = ilo_shader_compile_gs(state, variant);
  668.       break;
  669.    case PIPE_SHADER_COMPUTE:
  670.       sh = ilo_shader_compile_cs(state, variant);
  671.       break;
  672.    default:
  673.       sh = NULL;
  674.       break;
  675.    }
  676.    if (!sh) {
  677.       assert(!"failed to compile shader");
  678.       return NULL;
  679.    }
  680.  
  681.    sh->variant = *variant;
  682.  
  683.    copy_so_info(sh, &state->info.stream_output);
  684.  
  685.    ilo_shader_state_add_shader(state, sh);
  686.  
  687.    return sh;
  688. }
  689.  
  690. /**
  691.  * Update state->shader to point to a variant.  If the variant does not exist,
  692.  * it will be added first.
  693.  */
  694. bool
  695. ilo_shader_state_use_variant(struct ilo_shader_state *state,
  696.                              const struct ilo_shader_variant *variant)
  697. {
  698.    struct ilo_shader *sh;
  699.    bool construct_cso = false;
  700.  
  701.    sh = ilo_shader_state_search_variant(state, variant);
  702.    if (!sh) {
  703.       ilo_shader_state_gc(state);
  704.  
  705.       sh = ilo_shader_state_add_variant(state, variant);
  706.       if (!sh)
  707.          return false;
  708.  
  709.       construct_cso = true;
  710.    }
  711.  
  712.    /* move to head */
  713.    if (state->variants.next != &sh->list) {
  714.       list_del(&sh->list);
  715.       list_add(&sh->list, &state->variants);
  716.    }
  717.  
  718.    state->shader = sh;
  719.  
  720.    if (construct_cso) {
  721.       switch (state->info.type) {
  722.       case PIPE_SHADER_VERTEX:
  723.          ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
  724.          break;
  725.       case PIPE_SHADER_GEOMETRY:
  726.          ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
  727.          break;
  728.       case PIPE_SHADER_FRAGMENT:
  729.          ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
  730.          break;
  731.       default:
  732.          break;
  733.       }
  734.    }
  735.  
  736.    return true;
  737. }
  738.  
  739. struct ilo_shader_state *
  740. ilo_shader_create_vs(const struct ilo_dev_info *dev,
  741.                      const struct pipe_shader_state *state,
  742.                      const struct ilo_context *precompile)
  743. {
  744.    struct ilo_shader_state *shader;
  745.  
  746.    shader = ilo_shader_state_create(precompile, PIPE_SHADER_VERTEX, state);
  747.  
  748.    /* states used in ilo_shader_variant_init() */
  749.    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS |
  750.                                         ILO_DIRTY_RASTERIZER;
  751.  
  752.    return shader;
  753. }
  754.  
  755. struct ilo_shader_state *
  756. ilo_shader_create_gs(const struct ilo_dev_info *dev,
  757.                      const struct pipe_shader_state *state,
  758.                      const struct ilo_context *precompile)
  759. {
  760.    struct ilo_shader_state *shader;
  761.  
  762.    shader = ilo_shader_state_create(precompile, PIPE_SHADER_GEOMETRY, state);
  763.  
  764.    /* states used in ilo_shader_variant_init() */
  765.    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS |
  766.                                         ILO_DIRTY_VS |
  767.                                         ILO_DIRTY_RASTERIZER;
  768.  
  769.    return shader;
  770. }
  771.  
  772. struct ilo_shader_state *
  773. ilo_shader_create_fs(const struct ilo_dev_info *dev,
  774.                      const struct pipe_shader_state *state,
  775.                      const struct ilo_context *precompile)
  776. {
  777.    struct ilo_shader_state *shader;
  778.  
  779.    shader = ilo_shader_state_create(precompile, PIPE_SHADER_FRAGMENT, state);
  780.  
  781.    /* states used in ilo_shader_variant_init() */
  782.    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS |
  783.                                         ILO_DIRTY_RASTERIZER |
  784.                                         ILO_DIRTY_FB;
  785.  
  786.    return shader;
  787. }
  788.  
  789. struct ilo_shader_state *
  790. ilo_shader_create_cs(const struct ilo_dev_info *dev,
  791.                      const struct pipe_compute_state *state,
  792.                      const struct ilo_context *precompile)
  793. {
  794.    struct ilo_shader_state *shader;
  795.  
  796.    shader = ilo_shader_state_create(precompile, PIPE_SHADER_COMPUTE, state);
  797.  
  798.    shader->info.non_orthogonal_states = 0;
  799.  
  800.    return shader;
  801. }
  802.  
  803. /**
  804.  * Destroy a shader state.
  805.  */
  806. void
  807. ilo_shader_destroy(struct ilo_shader_state *shader)
  808. {
  809.    struct ilo_shader *sh, *next;
  810.  
  811.    LIST_FOR_EACH_ENTRY_SAFE(sh, next, &shader->variants, list)
  812.       ilo_shader_destroy_kernel(sh);
  813.  
  814.    FREE((struct tgsi_token *) shader->info.tokens);
  815.    FREE(shader);
  816. }
  817.  
  818. /**
  819.  * Return the type (PIPE_SHADER_x) of the shader.
  820.  */
  821. int
  822. ilo_shader_get_type(const struct ilo_shader_state *shader)
  823. {
  824.    return shader->info.type;
  825. }
  826.  
  827. /**
  828.  * Select a kernel for the given context.  This will compile a new kernel if
  829.  * none of the existing kernels work with the context.
  830.  *
  831.  * \param ilo the context
  832.  * \param dirty states of the context that are considered changed
  833.  * \return true if a different kernel is selected
  834.  */
  835. bool
  836. ilo_shader_select_kernel(struct ilo_shader_state *shader,
  837.                          const struct ilo_context *ilo,
  838.                          uint32_t dirty)
  839. {
  840.    const struct ilo_shader * const cur = shader->shader;
  841.    struct ilo_shader_variant variant;
  842.  
  843.    if (!(shader->info.non_orthogonal_states & dirty))
  844.       return false;
  845.  
  846.    ilo_shader_variant_init(&variant, &shader->info, ilo);
  847.    ilo_shader_state_use_variant(shader, &variant);
  848.  
  849.    return (shader->shader != cur);
  850. }
  851.  
  852. static int
  853. route_attr(const int *semantics, const int *indices, int len,
  854.            int semantic, int index)
  855. {
  856.    int i;
  857.  
  858.    for (i = 0; i < len; i++) {
  859.       if (semantics[i] == semantic && indices[i] == index)
  860.          return i;
  861.    }
  862.  
  863.    /* failed to match for COLOR, try BCOLOR */
  864.    if (semantic == TGSI_SEMANTIC_COLOR) {
  865.       for (i = 0; i < len; i++) {
  866.          if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
  867.             return i;
  868.       }
  869.    }
  870.  
  871.    return -1;
  872. }
  873.  
  874. /**
  875.  * Select a routing for the given source shader and rasterizer state.
  876.  *
  877.  * \return true if a different routing is selected
  878.  */
  879. bool
  880. ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
  881.                                  const struct ilo_shader_state *source,
  882.                                  const struct ilo_rasterizer_state *rasterizer)
  883. {
  884.    const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
  885.    const bool light_twoside = rasterizer->state.light_twoside;
  886.    struct ilo_shader *kernel = shader->shader;
  887.    struct ilo_kernel_routing *routing = &kernel->routing;
  888.    const int *src_semantics, *src_indices;
  889.    int src_len, max_src_slot;
  890.    int dst_len, dst_slot;
  891.  
  892.    /* we are constructing 3DSTATE_SBE here */
  893.    assert(shader->info.dev->gen >= ILO_GEN(6) &&
  894.           shader->info.dev->gen <= ILO_GEN(7));
  895.  
  896.    assert(kernel);
  897.  
  898.    if (source) {
  899.       assert(source->shader);
  900.       src_semantics = source->shader->out.semantic_names;
  901.       src_indices = source->shader->out.semantic_indices;
  902.       src_len = source->shader->out.count;
  903.    }
  904.    else {
  905.       src_semantics = kernel->in.semantic_names;
  906.       src_indices = kernel->in.semantic_indices;
  907.       src_len = kernel->in.count;
  908.    }
  909.  
  910.    /* no change */
  911.    if (kernel->routing_initialized &&
  912.        routing->source_skip + routing->source_len <= src_len &&
  913.        kernel->routing_sprite_coord_enable == sprite_coord_enable &&
  914.        !memcmp(kernel->routing_src_semantics,
  915.           &src_semantics[routing->source_skip],
  916.           sizeof(kernel->routing_src_semantics[0]) * routing->source_len) &&
  917.        !memcmp(kernel->routing_src_indices,
  918.           &src_indices[routing->source_skip],
  919.           sizeof(kernel->routing_src_indices[0]) * routing->source_len))
  920.       return false;
  921.  
  922.    if (source) {
  923.       /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
  924.       assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
  925.       assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
  926.       routing->source_skip = 2;
  927.  
  928.       routing->source_len = src_len - routing->source_skip;
  929.       src_semantics += routing->source_skip;
  930.       src_indices += routing->source_skip;
  931.    }
  932.    else {
  933.       routing->source_skip = 0;
  934.       routing->source_len = src_len;
  935.    }
  936.  
  937.    routing->const_interp_enable = kernel->in.const_interp_enable;
  938.    routing->point_sprite_enable = 0;
  939.    routing->swizzle_enable = false;
  940.  
  941.    assert(kernel->in.count <= Elements(routing->swizzles));
  942.    dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
  943.    max_src_slot = -1;
  944.  
  945.    for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
  946.       const int semantic = kernel->in.semantic_names[dst_slot];
  947.       const int index = kernel->in.semantic_indices[dst_slot];
  948.       int src_slot;
  949.  
  950.       if (semantic == TGSI_SEMANTIC_GENERIC &&
  951.           (sprite_coord_enable & (1 << index)))
  952.          routing->point_sprite_enable |= 1 << dst_slot;
  953.  
  954.       if (source) {
  955.          src_slot = route_attr(src_semantics, src_indices,
  956.                routing->source_len, semantic, index);
  957.  
  958.          /*
  959.           * The source shader stage does not output this attribute.  The value
  960.           * is supposed to be undefined, unless the attribute goes through
  961.           * point sprite replacement or the attribute is
  962.           * TGSI_SEMANTIC_POSITION.  In all cases, we do not care which source
  963.           * attribute is picked.
  964.           *
  965.           * We should update the kernel code and omit the output of
  966.           * TGSI_SEMANTIC_POSITION here.
  967.           */
  968.          if (src_slot < 0)
  969.             src_slot = 0;
  970.       }
  971.       else {
  972.          src_slot = dst_slot;
  973.       }
  974.  
  975.       routing->swizzles[dst_slot] = src_slot;
  976.  
  977.       /* use the following slot for two-sided lighting */
  978.       if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
  979.           src_slot + 1 < routing->source_len &&
  980.           src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
  981.           src_indices[src_slot + 1] == index) {
  982.          routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
  983.             ATTRIBUTE_SWIZZLE_SHIFT;
  984.          src_slot++;
  985.       }
  986.  
  987.       if (routing->swizzles[dst_slot] != dst_slot)
  988.          routing->swizzle_enable = true;
  989.  
  990.       if (max_src_slot < src_slot)
  991.          max_src_slot = src_slot;
  992.    }
  993.  
  994.    memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
  995.          sizeof(routing->swizzles[0]) * dst_slot);
  996.  
  997.    /*
  998.     * From the Sandy Bridge PRM, volume 2 part 1, page 248:
  999.     *
  1000.     *     "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
  1001.     *      0 indicating no Vertex URB data to be read.
  1002.     *
  1003.     *      This field should be set to the minimum length required to read the
  1004.     *      maximum source attribute. The maximum source attribute is indicated
  1005.     *      by the maximum value of the enabled Attribute # Source Attribute if
  1006.     *      Attribute Swizzle Enable is set, Number of Output Attributes-1 if
  1007.     *      enable is not set.
  1008.     *
  1009.     *        read_length = ceiling((max_source_attr+1)/2)
  1010.     *
  1011.     *      [errata] Corruption/Hang possible if length programmed larger than
  1012.     *      recommended"
  1013.     */
  1014.    routing->source_len = max_src_slot + 1;
  1015.  
  1016.    /* remember the states of the source */
  1017.    kernel->routing_initialized = true;
  1018.    kernel->routing_sprite_coord_enable = sprite_coord_enable;
  1019.    memcpy(kernel->routing_src_semantics, src_semantics,
  1020.          sizeof(kernel->routing_src_semantics[0]) * routing->source_len);
  1021.    memcpy(kernel->routing_src_indices, src_indices,
  1022.          sizeof(kernel->routing_src_indices[0]) * routing->source_len);
  1023.  
  1024.    return true;
  1025. }
  1026.  
  1027. /**
  1028.  * Return the cache offset of the selected kernel.  This must be called after
  1029.  * ilo_shader_select_kernel() and ilo_shader_cache_upload().
  1030.  */
  1031. uint32_t
  1032. ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader)
  1033. {
  1034.    const struct ilo_shader *kernel = shader->shader;
  1035.  
  1036.    assert(kernel && kernel->uploaded);
  1037.  
  1038.    return kernel->cache_offset;
  1039. }
  1040.  
  1041. /**
  1042.  * Query a kernel parameter for the selected kernel.
  1043.  */
  1044. int
  1045. ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
  1046.                             enum ilo_kernel_param param)
  1047. {
  1048.    const struct ilo_shader *kernel = shader->shader;
  1049.    int val;
  1050.  
  1051.    assert(kernel);
  1052.  
  1053.    switch (param) {
  1054.    case ILO_KERNEL_INPUT_COUNT:
  1055.       val = kernel->in.count;
  1056.       break;
  1057.    case ILO_KERNEL_OUTPUT_COUNT:
  1058.       val = kernel->out.count;
  1059.       break;
  1060.    case ILO_KERNEL_URB_DATA_START_REG:
  1061.       val = kernel->in.start_grf;
  1062.       break;
  1063.  
  1064.    case ILO_KERNEL_VS_INPUT_INSTANCEID:
  1065.       val = shader->info.has_instanceid;
  1066.       break;
  1067.    case ILO_KERNEL_VS_INPUT_VERTEXID:
  1068.       val = shader->info.has_vertexid;
  1069.       break;
  1070.    case ILO_KERNEL_VS_INPUT_EDGEFLAG:
  1071.       if (shader->info.edgeflag_in >= 0) {
  1072.          /* we rely on the state tracker here */
  1073.          assert(shader->info.edgeflag_in == kernel->in.count - 1);
  1074.          val = true;
  1075.       }
  1076.       else {
  1077.          val = false;
  1078.       }
  1079.       break;
  1080.    case ILO_KERNEL_VS_PCB_UCP_SIZE:
  1081.       val = kernel->pcb.clip_state_size;
  1082.       break;
  1083.    case ILO_KERNEL_VS_GEN6_SO:
  1084.       val = kernel->stream_output;
  1085.       break;
  1086.    case ILO_KERNEL_VS_GEN6_SO_START_REG:
  1087.       val = kernel->gs_start_grf;
  1088.       break;
  1089.    case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET:
  1090.       val = kernel->gs_offsets[0];
  1091.       break;
  1092.    case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET:
  1093.       val = kernel->gs_offsets[1];
  1094.       break;
  1095.    case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET:
  1096.       val = kernel->gs_offsets[2];
  1097.       break;
  1098.  
  1099.    case ILO_KERNEL_GS_DISCARD_ADJACENCY:
  1100.       val = kernel->in.discard_adj;
  1101.       break;
  1102.    case ILO_KERNEL_GS_GEN6_SVBI_POST_INC:
  1103.       val = kernel->svbi_post_inc;
  1104.       break;
  1105.  
  1106.    case ILO_KERNEL_FS_INPUT_Z:
  1107.    case ILO_KERNEL_FS_INPUT_W:
  1108.       val = kernel->in.has_pos;
  1109.       break;
  1110.    case ILO_KERNEL_FS_OUTPUT_Z:
  1111.       val = kernel->out.has_pos;
  1112.       break;
  1113.    case ILO_KERNEL_FS_USE_KILL:
  1114.       val = kernel->has_kill;
  1115.       break;
  1116.    case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS:
  1117.       val = kernel->in.barycentric_interpolation_mode;
  1118.       break;
  1119.    case ILO_KERNEL_FS_DISPATCH_16_OFFSET:
  1120.       val = 0;
  1121.       break;
  1122.  
  1123.    default:
  1124.       assert(!"unknown kernel parameter");
  1125.       val = 0;
  1126.       break;
  1127.    }
  1128.  
  1129.    return val;
  1130. }
  1131.  
  1132. /**
  1133.  * Return the CSO of the selected kernel.
  1134.  */
  1135. const struct ilo_shader_cso *
  1136. ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
  1137. {
  1138.    const struct ilo_shader *kernel = shader->shader;
  1139.  
  1140.    assert(kernel);
  1141.  
  1142.    return &kernel->cso;
  1143. }
  1144.  
  1145. /**
  1146.  * Return the SO info of the selected kernel.
  1147.  */
  1148. const struct pipe_stream_output_info *
  1149. ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
  1150. {
  1151.    const struct ilo_shader *kernel = shader->shader;
  1152.  
  1153.    assert(kernel);
  1154.  
  1155.    return &kernel->so_info;
  1156. }
  1157.  
  1158. /**
  1159.  * Return the routing info of the selected kernel.
  1160.  */
  1161. const struct ilo_kernel_routing *
  1162. ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
  1163. {
  1164.    const struct ilo_shader *kernel = shader->shader;
  1165.  
  1166.    assert(kernel);
  1167.  
  1168.    return &kernel->routing;
  1169. }
  1170.