Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2010 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "nv50/nv50_program.h"
  24. #include "nv50/nv50_context.h"
  25.  
  26. #include "codegen/nv50_ir_driver.h"
  27.  
  28. static INLINE unsigned
  29. bitcount4(const uint32_t val)
  30. {
  31.    static const uint8_t cnt[16]
  32.    = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
  33.    return cnt[val & 0xf];
  34. }
  35.  
  36. static int
  37. nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
  38. {
  39.    struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
  40.    unsigned i, n, c;
  41.  
  42.    n = 0;
  43.    for (i = 0; i < info->numInputs; ++i) {
  44.       prog->in[i].id = i;
  45.       prog->in[i].sn = info->in[i].sn;
  46.       prog->in[i].si = info->in[i].si;
  47.       prog->in[i].hw = n;
  48.       prog->in[i].mask = info->in[i].mask;
  49.  
  50.       prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
  51.  
  52.       for (c = 0; c < 4; ++c)
  53.          if (info->in[i].mask & (1 << c))
  54.             info->in[i].slot[c] = n++;
  55.  
  56.       if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
  57.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
  58.    }
  59.    prog->in_nr = info->numInputs;
  60.  
  61.    for (i = 0; i < info->numSysVals; ++i) {
  62.       switch (info->sv[i].sn) {
  63.       case TGSI_SEMANTIC_INSTANCEID:
  64.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
  65.          continue;
  66.       case TGSI_SEMANTIC_VERTEXID:
  67.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
  68.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
  69.          continue;
  70.       default:
  71.          break;
  72.       }
  73.    }
  74.  
  75.    /*
  76.     * Corner case: VP has no inputs, but we will still need to submit data to
  77.     * draw it. HW will shout at us and won't draw anything if we don't enable
  78.     * any input, so let's just pretend it's the first one.
  79.     */
  80.    if (prog->vp.attrs[0] == 0 &&
  81.        prog->vp.attrs[1] == 0 &&
  82.        prog->vp.attrs[2] == 0)
  83.       prog->vp.attrs[0] |= 0xf;
  84.  
  85.    /* VertexID before InstanceID */
  86.    if (info->io.vertexId < info->numSysVals)
  87.       info->sv[info->io.vertexId].slot[0] = n++;
  88.    if (info->io.instanceId < info->numSysVals)
  89.       info->sv[info->io.instanceId].slot[0] = n++;
  90.  
  91.    n = 0;
  92.    for (i = 0; i < info->numOutputs; ++i) {
  93.       switch (info->out[i].sn) {
  94.       case TGSI_SEMANTIC_PSIZE:
  95.          prog->vp.psiz = i;
  96.          break;
  97.       case TGSI_SEMANTIC_CLIPDIST:
  98.          prog->vp.clpd[info->out[i].si] = n;
  99.          break;
  100.       case TGSI_SEMANTIC_EDGEFLAG:
  101.          prog->vp.edgeflag = i;
  102.          break;
  103.       case TGSI_SEMANTIC_BCOLOR:
  104.          prog->vp.bfc[info->out[i].si] = i;
  105.          break;
  106.       case TGSI_SEMANTIC_LAYER:
  107.          prog->gp.has_layer = TRUE;
  108.          prog->gp.layerid = n;
  109.          break;
  110.       case TGSI_SEMANTIC_VIEWPORT_INDEX:
  111.          prog->gp.has_viewport = true;
  112.          prog->gp.viewportid = n;
  113.          break;
  114.       default:
  115.          break;
  116.       }
  117.       prog->out[i].id = i;
  118.       prog->out[i].sn = info->out[i].sn;
  119.       prog->out[i].si = info->out[i].si;
  120.       prog->out[i].hw = n;
  121.       prog->out[i].mask = info->out[i].mask;
  122.  
  123.       for (c = 0; c < 4; ++c)
  124.          if (info->out[i].mask & (1 << c))
  125.             info->out[i].slot[c] = n++;
  126.    }
  127.    prog->out_nr = info->numOutputs;
  128.    prog->max_out = n;
  129.    if (!prog->max_out)
  130.       prog->max_out = 1;
  131.  
  132.    if (prog->vp.psiz < info->numOutputs)
  133.       prog->vp.psiz = prog->out[prog->vp.psiz].hw;
  134.  
  135.    return 0;
  136. }
  137.  
  138. static int
  139. nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
  140. {
  141.    struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
  142.    unsigned i, n, m, c;
  143.    unsigned nvary;
  144.    unsigned nflat;
  145.    unsigned nintp = 0;
  146.  
  147.    /* count recorded non-flat inputs */
  148.    for (m = 0, i = 0; i < info->numInputs; ++i) {
  149.       switch (info->in[i].sn) {
  150.       case TGSI_SEMANTIC_POSITION:
  151.       case TGSI_SEMANTIC_FACE:
  152.          continue;
  153.       default:
  154.          m += info->in[i].flat ? 0 : 1;
  155.          break;
  156.       }
  157.    }
  158.    /* careful: id may be != i in info->in[prog->in[i].id] */
  159.  
  160.    /* Fill prog->in[] so that non-flat inputs are first and
  161.     * kick out special inputs that don't use the RESULT_MAP.
  162.     */
  163.    for (n = 0, i = 0; i < info->numInputs; ++i) {
  164.       if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
  165.          prog->fp.interp |= info->in[i].mask << 24;
  166.          for (c = 0; c < 4; ++c)
  167.             if (info->in[i].mask & (1 << c))
  168.                info->in[i].slot[c] = nintp++;
  169.       } else
  170.       if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
  171.          info->in[i].slot[0] = 255;
  172.       } else {
  173.          unsigned j = info->in[i].flat ? m++ : n++;
  174.  
  175.          if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
  176.             prog->vp.bfc[info->in[i].si] = j;
  177.          else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
  178.             prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
  179.  
  180.          prog->in[j].id = i;
  181.          prog->in[j].mask = info->in[i].mask;
  182.          prog->in[j].sn = info->in[i].sn;
  183.          prog->in[j].si = info->in[i].si;
  184.          prog->in[j].linear = info->in[i].linear;
  185.  
  186.          prog->in_nr++;
  187.       }
  188.    }
  189.    if (!(prog->fp.interp & (8 << 24))) {
  190.       ++nintp;
  191.       prog->fp.interp |= 8 << 24;
  192.    }
  193.  
  194.    for (i = 0; i < prog->in_nr; ++i) {
  195.       int j = prog->in[i].id;
  196.  
  197.       prog->in[i].hw = nintp;
  198.       for (c = 0; c < 4; ++c)
  199.          if (prog->in[i].mask & (1 << c))
  200.             info->in[j].slot[c] = nintp++;
  201.    }
  202.    /* (n == m) if m never increased, i.e. no flat inputs */
  203.    nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
  204.    nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
  205.    nvary = nintp - nflat;
  206.  
  207.    prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
  208.    prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
  209.  
  210.    /* put front/back colors right after HPOS */
  211.    prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
  212.    for (i = 0; i < 2; ++i)
  213.       if (prog->vp.bfc[i] < 0xff)
  214.          prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
  215.  
  216.    /* FP outputs */
  217.  
  218.    if (info->prop.fp.numColourResults > 1)
  219.       prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
  220.  
  221.    for (i = 0; i < info->numOutputs; ++i) {
  222.       prog->out[i].id = i;
  223.       prog->out[i].sn = info->out[i].sn;
  224.       prog->out[i].si = info->out[i].si;
  225.       prog->out[i].mask = info->out[i].mask;
  226.  
  227.       if (i == info->io.fragDepth || i == info->io.sampleMask)
  228.          continue;
  229.       prog->out[i].hw = info->out[i].si * 4;
  230.  
  231.       for (c = 0; c < 4; ++c)
  232.          info->out[i].slot[c] = prog->out[i].hw + c;
  233.  
  234.       prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
  235.    }
  236.  
  237.    if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
  238.       info->out[info->io.sampleMask].slot[0] = prog->max_out++;
  239.       prog->fp.has_samplemask = 1;
  240.    }
  241.  
  242.    if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
  243.       info->out[info->io.fragDepth].slot[2] = prog->max_out++;
  244.  
  245.    if (!prog->max_out)
  246.       prog->max_out = 4;
  247.  
  248.    return 0;
  249. }
  250.  
  251. static int
  252. nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
  253. {
  254.    switch (info->type) {
  255.    case PIPE_SHADER_VERTEX:
  256.       return nv50_vertprog_assign_slots(info);
  257.    case PIPE_SHADER_GEOMETRY:
  258.       return nv50_vertprog_assign_slots(info);
  259.    case PIPE_SHADER_FRAGMENT:
  260.       return nv50_fragprog_assign_slots(info);
  261.    default:
  262.       return -1;
  263.    }
  264. }
  265.  
  266. static struct nv50_stream_output_state *
  267. nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
  268.                                   const struct pipe_stream_output_info *pso)
  269. {
  270.    struct nv50_stream_output_state *so;
  271.    unsigned b, i, c;
  272.    unsigned base[4];
  273.  
  274.    so = MALLOC_STRUCT(nv50_stream_output_state);
  275.    if (!so)
  276.       return NULL;
  277.    memset(so->map, 0xff, sizeof(so->map));
  278.  
  279.    for (b = 0; b < 4; ++b)
  280.       so->num_attribs[b] = 0;
  281.    for (i = 0; i < pso->num_outputs; ++i) {
  282.       unsigned end =  pso->output[i].dst_offset + pso->output[i].num_components;
  283.       b = pso->output[i].output_buffer;
  284.       assert(b < 4);
  285.       so->num_attribs[b] = MAX2(so->num_attribs[b], end);
  286.    }
  287.  
  288.    so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
  289.  
  290.    so->stride[0] = pso->stride[0] * 4;
  291.    base[0] = 0;
  292.    for (b = 1; b < 4; ++b) {
  293.       assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
  294.       so->stride[b] = so->num_attribs[b] * 4;
  295.       if (so->num_attribs[b])
  296.          so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
  297.       base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
  298.    }
  299.    if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
  300.       assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
  301.       so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
  302.    }
  303.  
  304.    so->map_size = base[3] + so->num_attribs[3];
  305.  
  306.    for (i = 0; i < pso->num_outputs; ++i) {
  307.       const unsigned s = pso->output[i].start_component;
  308.       const unsigned p = pso->output[i].dst_offset;
  309.       const unsigned r = pso->output[i].register_index;
  310.       b = pso->output[i].output_buffer;
  311.  
  312.       for (c = 0; c < pso->output[i].num_components; ++c)
  313.          so->map[base[b] + p + c] = info->out[r].slot[s + c];
  314.    }
  315.  
  316.    return so;
  317. }
  318.  
  319. boolean
  320. nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
  321. {
  322.    struct nv50_ir_prog_info *info;
  323.    int ret;
  324.    const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
  325.  
  326.    info = CALLOC_STRUCT(nv50_ir_prog_info);
  327.    if (!info)
  328.       return FALSE;
  329.  
  330.    info->type = prog->type;
  331.    info->target = chipset;
  332.    info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
  333.    info->bin.source = (void *)prog->pipe.tokens;
  334.  
  335.    info->io.ucpCBSlot = 15;
  336.    info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
  337.    info->io.genUserClip = prog->vp.clpd_nr;
  338.    info->io.sampleInterp = prog->fp.sample_interp;
  339.  
  340.    info->io.resInfoCBSlot = 15;
  341.    info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
  342.    info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
  343.    info->io.msInfoCBSlot = 15;
  344.    info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
  345.  
  346.    info->assignSlots = nv50_program_assign_varying_slots;
  347.  
  348.    prog->vp.bfc[0] = 0xff;
  349.    prog->vp.bfc[1] = 0xff;
  350.    prog->vp.edgeflag = 0xff;
  351.    prog->vp.clpd[0] = map_undef;
  352.    prog->vp.clpd[1] = map_undef;
  353.    prog->vp.psiz = map_undef;
  354.    prog->gp.has_layer = 0;
  355.    prog->gp.has_viewport = 0;
  356.  
  357.    info->driverPriv = prog;
  358.  
  359. #ifdef DEBUG
  360.    info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
  361.    info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
  362. #else
  363.    info->optLevel = 3;
  364. #endif
  365.  
  366.    ret = nv50_ir_generate_code(info);
  367.    if (ret) {
  368.       NOUVEAU_ERR("shader translation failed: %i\n", ret);
  369.       goto out;
  370.    }
  371.    FREE(info->bin.syms);
  372.  
  373.    prog->code = info->bin.code;
  374.    prog->code_size = info->bin.codeSize;
  375.    prog->fixups = info->bin.relocData;
  376.    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
  377.    prog->tls_space = info->bin.tlsSpace;
  378.  
  379.    if (prog->type == PIPE_SHADER_FRAGMENT) {
  380.       if (info->prop.fp.writesDepth) {
  381.          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
  382.          prog->fp.flags[1] = 0x11;
  383.       }
  384.       if (info->prop.fp.usesDiscard)
  385.          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
  386.    } else
  387.    if (prog->type == PIPE_SHADER_GEOMETRY) {
  388.       switch (info->prop.gp.outputPrim) {
  389.       case PIPE_PRIM_LINE_STRIP:
  390.          prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
  391.          break;
  392.       case PIPE_PRIM_TRIANGLE_STRIP:
  393.          prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
  394.          break;
  395.       case PIPE_PRIM_POINTS:
  396.       default:
  397.          assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS);
  398.          prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
  399.          break;
  400.       }
  401.       prog->gp.vert_count = info->prop.gp.maxVertices;
  402.    }
  403.  
  404.    if (prog->pipe.stream_output.num_outputs)
  405.       prog->so = nv50_program_create_strmout_state(info,
  406.                                                    &prog->pipe.stream_output);
  407.  
  408. out:
  409.    FREE(info);
  410.    return !ret;
  411. }
  412.  
  413. boolean
  414. nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
  415. {
  416.    struct nouveau_heap *heap;
  417.    int ret;
  418.    uint32_t size = align(prog->code_size, 0x40);
  419.  
  420.    switch (prog->type) {
  421.    case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
  422.    case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
  423.    case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
  424.    default:
  425.       assert(!"invalid program type");
  426.       return FALSE;
  427.    }
  428.  
  429.    ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
  430.    if (ret) {
  431.       /* Out of space: evict everything to compactify the code segment, hoping
  432.        * the working set is much smaller and drifts slowly. Improve me !
  433.        */
  434.       while (heap->next) {
  435.          struct nv50_program *evict = heap->next->priv;
  436.          if (evict)
  437.             nouveau_heap_free(&evict->mem);
  438.       }
  439.       debug_printf("WARNING: out of code space, evicting all shaders.\n");
  440.       ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
  441.       if (ret) {
  442.          NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
  443.          return FALSE;
  444.       }
  445.    }
  446.    prog->code_base = prog->mem->start;
  447.  
  448.    ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
  449.    if (ret < 0) {
  450.       nouveau_heap_free(&prog->mem);
  451.       return FALSE;
  452.    }
  453.    if (ret > 0)
  454.       nv50->state.new_tls_space = TRUE;
  455.  
  456.    if (prog->fixups)
  457.       nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
  458.  
  459.    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
  460.                        (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
  461.                        NOUVEAU_BO_VRAM, prog->code_size, prog->code);
  462.  
  463.    BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
  464.    PUSH_DATA (nv50->base.pushbuf, 0);
  465.  
  466.    return TRUE;
  467. }
  468.  
  469. void
  470. nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
  471. {
  472.    const struct pipe_shader_state pipe = p->pipe;
  473.    const ubyte type = p->type;
  474.  
  475.    if (p->mem)
  476.       nouveau_heap_free(&p->mem);
  477.  
  478.    FREE(p->code);
  479.  
  480.    FREE(p->fixups);
  481.  
  482.    FREE(p->so);
  483.  
  484.    memset(p, 0, sizeof(*p));
  485.  
  486.    p->pipe = pipe;
  487.    p->type = type;
  488. }
  489.