Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2010 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "nv50_program.h"
  24. #include "nv50_context.h"
  25.  
  26. #include "codegen/nv50_ir_driver.h"
  27.  
  28. static INLINE unsigned
  29. bitcount4(const uint32_t val)
  30. {
  31.    static const uint8_t cnt[16]
  32.    = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
  33.    return cnt[val & 0xf];
  34. }
  35.  
  36. static int
  37. nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
  38. {
  39.    struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
  40.    unsigned i, n, c;
  41.  
  42.    n = 0;
  43.    for (i = 0; i < info->numInputs; ++i) {
  44.       prog->in[i].id = i;
  45.       prog->in[i].sn = info->in[i].sn;
  46.       prog->in[i].si = info->in[i].si;
  47.       prog->in[i].hw = n;
  48.       prog->in[i].mask = info->in[i].mask;
  49.  
  50.       prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
  51.  
  52.       for (c = 0; c < 4; ++c)
  53.          if (info->in[i].mask & (1 << c))
  54.             info->in[i].slot[c] = n++;
  55.    }
  56.    prog->in_nr = info->numInputs;
  57.  
  58.    for (i = 0; i < info->numSysVals; ++i) {
  59.       switch (info->sv[i].sn) {
  60.       case TGSI_SEMANTIC_INSTANCEID:
  61.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
  62.          continue;
  63.       case TGSI_SEMANTIC_VERTEXID:
  64.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
  65.          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
  66.          continue;
  67.       default:
  68.          break;
  69.       }
  70.    }
  71.  
  72.    /*
  73.     * Corner case: VP has no inputs, but we will still need to submit data to
  74.     * draw it. HW will shout at us and won't draw anything if we don't enable
  75.     * any input, so let's just pretend it's the first one.
  76.     */
  77.    if (prog->vp.attrs[0] == 0 &&
  78.        prog->vp.attrs[1] == 0 &&
  79.        prog->vp.attrs[2] == 0)
  80.       prog->vp.attrs[0] |= 0xf;
  81.  
  82.    /* VertexID before InstanceID */
  83.    if (info->io.vertexId < info->numSysVals)
  84.       info->sv[info->io.vertexId].slot[0] = n++;
  85.    if (info->io.instanceId < info->numSysVals)
  86.       info->sv[info->io.instanceId].slot[0] = n++;
  87.  
  88.    n = 0;
  89.    for (i = 0; i < info->numOutputs; ++i) {
  90.       switch (info->out[i].sn) {
  91.       case TGSI_SEMANTIC_PSIZE:
  92.          prog->vp.psiz = i;
  93.          break;
  94.       case TGSI_SEMANTIC_CLIPDIST:
  95.          prog->vp.clpd[info->out[i].si] = n;
  96.          break;
  97.       case TGSI_SEMANTIC_EDGEFLAG:
  98.          prog->vp.edgeflag = i;
  99.          break;
  100.       case TGSI_SEMANTIC_BCOLOR:
  101.          prog->vp.bfc[info->out[i].si] = i;
  102.          break;
  103.       default:
  104.          break;
  105.       }
  106.       prog->out[i].id = i;
  107.       prog->out[i].sn = info->out[i].sn;
  108.       prog->out[i].si = info->out[i].si;
  109.       prog->out[i].hw = n;
  110.       prog->out[i].mask = info->out[i].mask;
  111.  
  112.       for (c = 0; c < 4; ++c)
  113.          if (info->out[i].mask & (1 << c))
  114.             info->out[i].slot[c] = n++;
  115.    }
  116.    prog->out_nr = info->numOutputs;
  117.    prog->max_out = n;
  118.  
  119.    if (prog->vp.psiz < info->numOutputs)
  120.       prog->vp.psiz = prog->out[prog->vp.psiz].hw;
  121.  
  122.    return 0;
  123. }
  124.  
  125. static int
  126. nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
  127. {
  128.    struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
  129.    unsigned i, n, m, c;
  130.    unsigned nvary;
  131.    unsigned nflat;
  132.    unsigned nintp = 0;
  133.  
  134.    /* count recorded non-flat inputs */
  135.    for (m = 0, i = 0; i < info->numInputs; ++i) {
  136.       switch (info->in[i].sn) {
  137.       case TGSI_SEMANTIC_POSITION:
  138.       case TGSI_SEMANTIC_FACE:
  139.          continue;
  140.       default:
  141.          m += info->in[i].flat ? 0 : 1;
  142.          break;
  143.       }
  144.    }
  145.    /* careful: id may be != i in info->in[prog->in[i].id] */
  146.  
  147.    /* Fill prog->in[] so that non-flat inputs are first and
  148.     * kick out special inputs that don't use the RESULT_MAP.
  149.     */
  150.    for (n = 0, i = 0; i < info->numInputs; ++i) {
  151.       if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
  152.          prog->fp.interp |= info->in[i].mask << 24;
  153.          for (c = 0; c < 4; ++c)
  154.             if (info->in[i].mask & (1 << c))
  155.                info->in[i].slot[c] = nintp++;
  156.       } else
  157.       if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
  158.          info->in[i].slot[0] = 255;
  159.       } else {
  160.          unsigned j = info->in[i].flat ? m++ : n++;
  161.  
  162.          if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
  163.             prog->vp.bfc[info->in[i].si] = j;
  164.  
  165.          prog->in[j].id = i;
  166.          prog->in[j].mask = info->in[i].mask;
  167.          prog->in[j].sn = info->in[i].sn;
  168.          prog->in[j].si = info->in[i].si;
  169.          prog->in[j].linear = info->in[i].linear;
  170.  
  171.          prog->in_nr++;
  172.       }
  173.    }
  174.    if (!(prog->fp.interp & (8 << 24))) {
  175.       ++nintp;
  176.       prog->fp.interp |= 8 << 24;
  177.    }
  178.  
  179.    for (i = 0; i < prog->in_nr; ++i) {
  180.       int j = prog->in[i].id;
  181.  
  182.       prog->in[i].hw = nintp;
  183.       for (c = 0; c < 4; ++c)
  184.          if (prog->in[i].mask & (1 << c))
  185.             info->in[j].slot[c] = nintp++;
  186.    }
  187.    /* (n == m) if m never increased, i.e. no flat inputs */
  188.    nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
  189.    nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
  190.    nvary = nintp - nflat;
  191.  
  192.    prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
  193.    prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
  194.  
  195.    /* put front/back colors right after HPOS */
  196.    prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
  197.    for (i = 0; i < 2; ++i)
  198.       if (prog->vp.bfc[i] < 0xff)
  199.          prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
  200.  
  201.    /* FP outputs */
  202.  
  203.    if (info->prop.fp.numColourResults > 1)
  204.       prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
  205.  
  206.    for (i = 0; i < info->numOutputs; ++i) {
  207.       prog->out[i].id = i;
  208.       prog->out[i].sn = info->out[i].sn;
  209.       prog->out[i].si = info->out[i].si;
  210.       prog->out[i].mask = info->out[i].mask;
  211.  
  212.       if (i == info->io.fragDepth || i == info->io.sampleMask)
  213.          continue;
  214.       prog->out[i].hw = info->out[i].si * 4;
  215.  
  216.       for (c = 0; c < 4; ++c)
  217.          info->out[i].slot[c] = prog->out[i].hw + c;
  218.  
  219.       prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
  220.    }
  221.  
  222.    if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
  223.       info->out[info->io.sampleMask].slot[0] = prog->max_out++;
  224.  
  225.    if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
  226.       info->out[info->io.fragDepth].slot[2] = prog->max_out++;
  227.  
  228.    if (!prog->max_out)
  229.       prog->max_out = 4;
  230.  
  231.    return 0;
  232. }
  233.  
  234. static int
  235. nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
  236. {
  237.    switch (info->type) {
  238.    case PIPE_SHADER_VERTEX:
  239.       return nv50_vertprog_assign_slots(info);
  240.    case PIPE_SHADER_GEOMETRY:
  241.       return nv50_vertprog_assign_slots(info);
  242.    case PIPE_SHADER_FRAGMENT:
  243.       return nv50_fragprog_assign_slots(info);
  244.    default:
  245.       return -1;
  246.    }
  247. }
  248.  
  249. static struct nv50_stream_output_state *
  250. nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
  251.                                   const struct pipe_stream_output_info *pso)
  252. {
  253.    struct nv50_stream_output_state *so;
  254.    unsigned b, i, c;
  255.    unsigned base[4];
  256.  
  257.    so = MALLOC_STRUCT(nv50_stream_output_state);
  258.    if (!so)
  259.       return NULL;
  260.    memset(so->map, 0xff, sizeof(so->map));
  261.  
  262.    for (b = 0; b < 4; ++b)
  263.       so->num_attribs[b] = 0;
  264.    for (i = 0; i < pso->num_outputs; ++i) {
  265.       unsigned end =  pso->output[i].dst_offset + pso->output[i].num_components;
  266.       b = pso->output[i].output_buffer;
  267.       assert(b < 4);
  268.       so->num_attribs[b] = MAX2(so->num_attribs[b], end);
  269.    }
  270.  
  271.    so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
  272.  
  273.    so->stride[0] = pso->stride[0] * 4;
  274.    base[0] = 0;
  275.    for (b = 1; b < 4; ++b) {
  276.       assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
  277.       so->stride[b] = so->num_attribs[b] * 4;
  278.       if (so->num_attribs[b])
  279.          so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
  280.       base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
  281.    }
  282.    if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
  283.       assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
  284.       so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
  285.    }
  286.  
  287.    so->map_size = base[3] + so->num_attribs[3];
  288.  
  289.    for (i = 0; i < pso->num_outputs; ++i) {
  290.       const unsigned s = pso->output[i].start_component;
  291.       const unsigned p = pso->output[i].dst_offset;
  292.       const unsigned r = pso->output[i].register_index;
  293.       b = pso->output[i].output_buffer;
  294.  
  295.       for (c = 0; c < pso->output[i].num_components; ++c)
  296.          so->map[base[b] + p + c] = info->out[r].slot[s + c];
  297.    }
  298.  
  299.    return so;
  300. }
  301.  
  302. boolean
  303. nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
  304. {
  305.    struct nv50_ir_prog_info *info;
  306.    int ret;
  307.    const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
  308.  
  309.    info = CALLOC_STRUCT(nv50_ir_prog_info);
  310.    if (!info)
  311.       return FALSE;
  312.  
  313.    info->type = prog->type;
  314.    info->target = chipset;
  315.    info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
  316.    info->bin.source = (void *)prog->pipe.tokens;
  317.  
  318.    info->io.ucpCBSlot = 15;
  319.    info->io.ucpBase = 0;
  320.    info->io.genUserClip = prog->vp.clpd_nr;
  321.  
  322.    info->assignSlots = nv50_program_assign_varying_slots;
  323.  
  324.    prog->vp.bfc[0] = 0xff;
  325.    prog->vp.bfc[1] = 0xff;
  326.    prog->vp.edgeflag = 0xff;
  327.    prog->vp.clpd[0] = map_undef;
  328.    prog->vp.clpd[1] = map_undef;
  329.    prog->vp.psiz = map_undef;
  330.    prog->gp.primid = 0x80;
  331.  
  332.    info->driverPriv = prog;
  333.  
  334. #ifdef DEBUG
  335.    info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
  336.    info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
  337. #else
  338.    info->optLevel = 3;
  339. #endif
  340.  
  341.    ret = nv50_ir_generate_code(info);
  342.    if (ret) {
  343.       NOUVEAU_ERR("shader translation failed: %i\n", ret);
  344.       goto out;
  345.    }
  346.    FREE(info->bin.syms);
  347.  
  348.    prog->code = info->bin.code;
  349.    prog->code_size = info->bin.codeSize;
  350.    prog->fixups = info->bin.relocData;
  351.    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
  352.    prog->tls_space = info->bin.tlsSpace;
  353.  
  354.    if (prog->type == PIPE_SHADER_FRAGMENT) {
  355.       if (info->prop.fp.writesDepth) {
  356.          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
  357.          prog->fp.flags[1] = 0x11;
  358.       }
  359.       if (info->prop.fp.usesDiscard)
  360.          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
  361.    }
  362.  
  363.    if (prog->pipe.stream_output.num_outputs)
  364.       prog->so = nv50_program_create_strmout_state(info,
  365.                                                    &prog->pipe.stream_output);
  366.  
  367. out:
  368.    FREE(info);
  369.    return !ret;
  370. }
  371.  
  372. boolean
  373. nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
  374. {
  375.    struct nouveau_heap *heap;
  376.    int ret;
  377.    uint32_t size = align(prog->code_size, 0x40);
  378.  
  379.    switch (prog->type) {
  380.    case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
  381.    case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
  382.    case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
  383.    default:
  384.       assert(!"invalid program type");
  385.       return FALSE;
  386.    }
  387.  
  388.    ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
  389.    if (ret) {
  390.       /* Out of space: evict everything to compactify the code segment, hoping
  391.        * the working set is much smaller and drifts slowly. Improve me !
  392.        */
  393.       while (heap->next) {
  394.          struct nv50_program *evict = heap->next->priv;
  395.          if (evict)
  396.             nouveau_heap_free(&evict->mem);
  397.       }
  398.       debug_printf("WARNING: out of code space, evicting all shaders.\n");
  399.       ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
  400.       if (ret) {
  401.          NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
  402.          return FALSE;
  403.       }
  404.    }
  405.    prog->code_base = prog->mem->start;
  406.  
  407.    ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
  408.    if (ret < 0)
  409.       return FALSE;
  410.    if (ret > 0)
  411.       nv50->state.new_tls_space = TRUE;
  412.  
  413.    if (prog->fixups)
  414.       nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
  415.  
  416.    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
  417.                        (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
  418.                        NOUVEAU_BO_VRAM, prog->code_size, prog->code);
  419.  
  420.    BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
  421.    PUSH_DATA (nv50->base.pushbuf, 0);
  422.  
  423.    return TRUE;
  424. }
  425.  
  426. void
  427. nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
  428. {
  429.    const struct pipe_shader_state pipe = p->pipe;
  430.    const ubyte type = p->type;
  431.  
  432.    if (p->mem)
  433.       nouveau_heap_free(&p->mem);
  434.  
  435.    FREE(p->code);
  436.  
  437.    FREE(p->fixups);
  438.  
  439.    FREE(p->so);
  440.  
  441.    memset(p, 0, sizeof(*p));
  442.  
  443.    p->pipe = pipe;
  444.    p->type = type;
  445. }
  446.