Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2012 Nouveau Project
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * Authors: Christoph Bumiller
  23.  */
  24.  
  25. #include "nvc0_context.h"
  26. #include "nve4_compute.h"
  27.  
  28. #include "nv50/codegen/nv50_ir_driver.h"
  29.  
  30. #ifdef DEBUG
  31. static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
  32. #endif
  33.  
  34.  
  35. int
  36. nve4_screen_compute_setup(struct nvc0_screen *screen,
  37.                           struct nouveau_pushbuf *push)
  38. {
  39.    struct nouveau_device *dev = screen->base.device;
  40.    struct nouveau_object *chan = screen->base.channel;
  41.    unsigned i;
  42.    int ret;
  43.    uint32_t obj_class;
  44.  
  45.    switch (dev->chipset & 0xf0) {
  46.    case 0xf0:
  47.       obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
  48.       break;
  49.    case 0xe0:
  50.       obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
  51.       break;
  52.    default:
  53.       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
  54.       return -1;
  55.    }
  56.  
  57.    ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
  58.                             &screen->compute);
  59.    if (ret) {
  60.       NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
  61.       return ret;
  62.    }
  63.  
  64.    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
  65.                         &screen->parm);
  66.    if (ret)
  67.       return ret;
  68.  
  69.    BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
  70.    PUSH_DATA (push, screen->compute->oclass);
  71.  
  72.    BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
  73.    PUSH_DATAh(push, screen->tls->offset);
  74.    PUSH_DATA (push, screen->tls->offset);
  75.    /* No idea why there are 2. Divide size by 2 to be safe.
  76.     * Actually this might be per-MP TEMP size and looks like I'm only using
  77.     * 2 MPs instead of all 8.
  78.     */
  79.    BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
  80.    PUSH_DATAh(push, screen->tls->size / screen->mp_count);
  81.    PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
  82.    PUSH_DATA (push, 0xff);
  83.    BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
  84.    PUSH_DATAh(push, screen->tls->size / screen->mp_count);
  85.    PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
  86.    PUSH_DATA (push, 0xff);
  87.  
  88.    /* Unified address space ? Who needs that ? Certainly not OpenCL.
  89.     *
  90.     * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
  91.     *  accessible. We cannot prevent that at the moment, so expect failure.
  92.     */
  93.    BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
  94.    PUSH_DATA (push, 1 << 24);
  95.    BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
  96.    PUSH_DATA (push, 2 << 24);
  97.  
  98.    BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
  99.    PUSH_DATAh(push, screen->text->offset);
  100.    PUSH_DATA (push, screen->text->offset);
  101.  
  102.    BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
  103.    PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
  104.  
  105.    /* NOTE: these do not affect the state used by the 3D object */
  106.    BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
  107.    PUSH_DATAh(push, screen->txc->offset);
  108.    PUSH_DATA (push, screen->txc->offset);
  109.    PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
  110.    BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
  111.    PUSH_DATAh(push, screen->txc->offset + 65536);
  112.    PUSH_DATA (push, screen->txc->offset + 65536);
  113.    PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
  114.  
  115.    if (obj_class >= NVF0_COMPUTE_CLASS) {
  116.       BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
  117.       PUSH_DATA (push, 0x100);
  118.       BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
  119.       for (i = 63; i >= 1; --i)
  120.          PUSH_DATA(push, 0x38000 | i);
  121.       IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
  122.       IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
  123.    }
  124.  
  125.    BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
  126.    PUSH_DATA (push, 0); /* does not interefere with 3D */
  127.  
  128.    if (obj_class >= NVF0_COMPUTE_CLASS)
  129.       IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
  130.  
  131.    /* MS sample coordinate offsets: these do not work with _ALT modes ! */
  132.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  133.    PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
  134.    PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
  135.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  136.    PUSH_DATA (push, 64);
  137.    PUSH_DATA (push, 1);
  138.    BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
  139.    PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
  140.    PUSH_DATA (push, 0); /* 0 */
  141.    PUSH_DATA (push, 0);
  142.    PUSH_DATA (push, 1); /* 1 */
  143.    PUSH_DATA (push, 0);
  144.    PUSH_DATA (push, 0); /* 2 */
  145.    PUSH_DATA (push, 1);
  146.    PUSH_DATA (push, 1); /* 3 */
  147.    PUSH_DATA (push, 1);
  148.    PUSH_DATA (push, 2); /* 4 */
  149.    PUSH_DATA (push, 0);
  150.    PUSH_DATA (push, 3); /* 5 */
  151.    PUSH_DATA (push, 0);
  152.    PUSH_DATA (push, 2); /* 6 */
  153.    PUSH_DATA (push, 1);
  154.    PUSH_DATA (push, 3); /* 7 */
  155.    PUSH_DATA (push, 1);
  156.  
  157. #ifdef DEBUG
  158.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  159.    PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
  160.    PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
  161.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  162.    PUSH_DATA (push, 28);
  163.    PUSH_DATA (push, 1);
  164.    BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8);
  165.    PUSH_DATA (push, 1);
  166.    PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
  167.    PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
  168.    PUSH_DATA (push, screen->tls->offset);
  169.    PUSH_DATAh(push, screen->tls->offset);
  170.    PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */
  171.    PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */
  172.    PUSH_DATA (push, 0); /* warp cfstack size */
  173. #endif
  174.  
  175.    BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
  176.    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
  177.  
  178.    return 0;
  179. }
  180.  
  181.  
  182. static void
  183. nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
  184. {
  185.    struct nvc0_screen *screen = nvc0->screen;
  186.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  187.    struct nv50_surface *sf;
  188.    struct nv04_resource *res;
  189.    uint32_t mask;
  190.    unsigned i;
  191.    const unsigned t = 1;
  192.  
  193.    mask = nvc0->surfaces_dirty[t];
  194.    while (mask) {
  195.       i = ffs(mask) - 1;
  196.       mask &= ~(1 << i);
  197.  
  198.       /*
  199.        * NVE4's surface load/store instructions receive all the information
  200.        * directly instead of via binding points, so we have to supply them.
  201.        */
  202.       BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  203.       PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
  204.       PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
  205.       BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  206.       PUSH_DATA (push, 64);
  207.       PUSH_DATA (push, 1);
  208.       BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
  209.       PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
  210.  
  211.       nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
  212.  
  213.       sf = nv50_surface(nvc0->surfaces[t][i]);
  214.       if (sf) {
  215.          res = nv04_resource(sf->base.texture);
  216.  
  217.          if (sf->base.writable)
  218.             BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
  219.          else
  220.             BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
  221.       }
  222.    }
  223.    if (nvc0->surfaces_dirty[t]) {
  224.       BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
  225.       PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
  226.    }
  227.  
  228.    /* re-reference non-dirty surfaces */
  229.    mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
  230.    while (mask) {
  231.       i = ffs(mask) - 1;
  232.       mask &= ~(1 << i);
  233.  
  234.       sf = nv50_surface(nvc0->surfaces[t][i]);
  235.       res = nv04_resource(sf->base.texture);
  236.  
  237.       if (sf->base.writable)
  238.          BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
  239.       else
  240.          BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
  241.    }
  242.  
  243.    nvc0->surfaces_dirty[t] = 0;
  244. }
  245.  
  246.  
  247. /* Thankfully, textures with samplers follow the normal rules. */
  248. static void
  249. nve4_compute_validate_samplers(struct nvc0_context *nvc0)
  250. {
  251.    boolean need_flush = nve4_validate_tsc(nvc0, 5);
  252.    if (need_flush) {
  253.       BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
  254.       PUSH_DATA (nvc0->base.pushbuf, 0);
  255.    }
  256. }
  257. /* (Code duplicated at bottom for various non-convincing reasons.
  258.  *  E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
  259.  *  entries to avoid a subchannel switch.
  260.  *  Same for texture cache flushes.
  261.  *  Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
  262.  */
  263. static void nve4_compute_validate_textures(struct nvc0_context *);
  264.  
  265. static void
  266. nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
  267. {
  268.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  269.    uint64_t address;
  270.    const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
  271.    unsigned i, n;
  272.    uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
  273.  
  274.    if (!dirty)
  275.       return;
  276.    i = ffs(dirty) - 1;
  277.    n = util_logbase2(dirty) + 1 - i;
  278.    assert(n);
  279.  
  280.    address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
  281.  
  282.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  283.    PUSH_DATAh(push, address);
  284.    PUSH_DATA (push, address);
  285.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  286.    PUSH_DATA (push, n * 4);
  287.    PUSH_DATA (push, 0x1);
  288.    BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
  289.    PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
  290.    PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
  291.  
  292.    BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
  293.    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
  294.  
  295.    nvc0->textures_dirty[s] = 0;
  296.    nvc0->samplers_dirty[s] = 0;
  297. }
  298.  
  299.  
  300. static boolean
  301. nve4_compute_validate_program(struct nvc0_context *nvc0)
  302. {
  303.    struct nvc0_program *prog = nvc0->compprog;
  304.  
  305.    if (prog->mem)
  306.       return TRUE;
  307.  
  308.    if (!prog->translated) {
  309.       prog->translated = nvc0_program_translate(
  310.          prog, nvc0->screen->base.device->chipset);
  311.       if (!prog->translated)
  312.          return FALSE;
  313.    }
  314.    if (unlikely(!prog->code_size))
  315.       return FALSE;
  316.  
  317.    if (likely(prog->code_size)) {
  318.       if (nvc0_program_upload_code(nvc0, prog)) {
  319.          struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  320.          BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
  321.          PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CODE);
  322.          return TRUE;
  323.       }
  324.    }
  325.    return FALSE;
  326. }
  327.  
  328.  
  329. static boolean
  330. nve4_compute_state_validate(struct nvc0_context *nvc0)
  331. {
  332.    if (!nve4_compute_validate_program(nvc0))
  333.       return FALSE;
  334.    if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
  335.       nve4_compute_validate_textures(nvc0);
  336.    if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
  337.       nve4_compute_validate_samplers(nvc0);
  338.    if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS))
  339.        nve4_compute_set_tex_handles(nvc0);
  340.    if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
  341.       nve4_compute_validate_surfaces(nvc0);
  342.    if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
  343.       nvc0_validate_global_residents(nvc0,
  344.                                      nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
  345.  
  346.    nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
  347.  
  348.    nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
  349.    if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
  350.       return FALSE;
  351.    if (unlikely(nvc0->state.flushed))
  352.       nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
  353.  
  354.    return TRUE;
  355. }
  356.  
  357.  
  358. static void
  359. nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
  360.                           const uint *block_layout,
  361.                           const uint *grid_layout)
  362. {
  363.    struct nvc0_screen *screen = nvc0->screen;
  364.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  365.    struct nvc0_program *cp = nvc0->compprog;
  366.  
  367.    if (cp->parm_size) {
  368.       BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  369.       PUSH_DATAh(push, screen->parm->offset);
  370.       PUSH_DATA (push, screen->parm->offset);
  371.       BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  372.       PUSH_DATA (push, cp->parm_size);
  373.       PUSH_DATA (push, 0x1);
  374.       BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
  375.       PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
  376.       PUSH_DATAp(push, input, cp->parm_size / 4);
  377.    }
  378.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  379.    PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
  380.    PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
  381.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  382.    PUSH_DATA (push, 7 * 4);
  383.    PUSH_DATA (push, 0x1);
  384.    BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
  385.    PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
  386.    PUSH_DATAp(push, block_layout, 3);
  387.    PUSH_DATAp(push, grid_layout, 3);
  388.    PUSH_DATA (push, 0);
  389.  
  390.    BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
  391.    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
  392. }
  393.  
  394. static INLINE uint8_t
  395. nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
  396. {
  397.    if (shared_size > (32 << 10))
  398.       return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
  399.    if (shared_size > (16 << 10))
  400.       return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
  401.    return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
  402. }
  403.  
  404. static void
  405. nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
  406.                                struct nve4_cp_launch_desc *desc,
  407.                                uint32_t label,
  408.                                const uint *block_layout,
  409.                                const uint *grid_layout)
  410. {
  411.    const struct nvc0_screen *screen = nvc0->screen;
  412.    const struct nvc0_program *cp = nvc0->compprog;
  413.    unsigned i;
  414.  
  415.    nve4_cp_launch_desc_init_default(desc);
  416.  
  417.    desc->entry = nvc0_program_symbol_offset(cp, label);
  418.  
  419.    desc->griddim_x = grid_layout[0];
  420.    desc->griddim_y = grid_layout[1];
  421.    desc->griddim_z = grid_layout[2];
  422.    desc->blockdim_x = block_layout[0];
  423.    desc->blockdim_y = block_layout[1];
  424.    desc->blockdim_z = block_layout[2];
  425.  
  426.    desc->shared_size = align(cp->cp.smem_size, 0x100);
  427.    desc->local_size_p = align(cp->cp.lmem_size, 0x10);
  428.    desc->local_size_n = 0;
  429.    desc->cstack_size = 0x800;
  430.    desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
  431.  
  432.    desc->gpr_alloc = cp->num_gprs;
  433.    desc->bar_alloc = cp->num_barriers;
  434.  
  435.    for (i = 0; i < 7; ++i) {
  436.       const unsigned s = 5;
  437.       if (nvc0->constbuf[s][i].u.buf)
  438.          nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
  439.    }
  440.    nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
  441. }
  442.  
  443. static INLINE struct nve4_cp_launch_desc *
  444. nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
  445.                                struct nouveau_bo **pbo, uint64_t *pgpuaddr)
  446. {
  447.    uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
  448.    if (!ptr)
  449.       return NULL;
  450.    if (*pgpuaddr & 255) {
  451.       unsigned adj = 256 - (*pgpuaddr & 255);
  452.       ptr += adj;
  453.       *pgpuaddr += adj;
  454.    }
  455.    return (struct nve4_cp_launch_desc *)ptr;
  456. }
  457.  
  458. void
  459. nve4_launch_grid(struct pipe_context *pipe,
  460.                  const uint *block_layout, const uint *grid_layout,
  461.                  uint32_t label,
  462.                  const void *input)
  463. {
  464.    struct nvc0_context *nvc0 = nvc0_context(pipe);
  465.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  466.    struct nve4_cp_launch_desc *desc;
  467.    uint64_t desc_gpuaddr;
  468.    struct nouveau_bo *desc_bo;
  469.    int ret;
  470.  
  471.    desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
  472.    if (!desc) {
  473.       ret = -1;
  474.       goto out;
  475.    }
  476.    BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
  477.                 desc_bo);
  478.  
  479.    ret = !nve4_compute_state_validate(nvc0);
  480.    if (ret)
  481.       goto out;
  482.  
  483.    nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
  484. #ifdef DEBUG
  485.    if (debug_get_num_option("NV50_PROG_DEBUG", 0))
  486.       nve4_compute_dump_launch_desc(desc);
  487. #endif
  488.  
  489.    nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
  490.  
  491.    /* upload descriptor and flush */
  492. #if 0
  493.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  494.    PUSH_DATAh(push, desc_gpuaddr);
  495.    PUSH_DATA (push, desc_gpuaddr);
  496.    BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  497.    PUSH_DATA (push, 256);
  498.    PUSH_DATA (push, 1);
  499.    BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
  500.    PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
  501.    PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
  502.    BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
  503.    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
  504. #endif
  505.    BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
  506.    PUSH_DATA (push, desc_gpuaddr >> 8);
  507.    BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
  508.    PUSH_DATA (push, 0x3);
  509.    BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
  510.    PUSH_DATA (push, 0);
  511.  
  512. out:
  513.    if (ret)
  514.       NOUVEAU_ERR("Failed to launch grid !\n");
  515.    nouveau_scratch_done(&nvc0->base);
  516.    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
  517. }
  518.  
  519.  
  520. #define NVE4_TIC_ENTRY_INVALID 0x000fffff
  521.  
  522. static void
  523. nve4_compute_validate_textures(struct nvc0_context *nvc0)
  524. {
  525.    struct nouveau_bo *txc = nvc0->screen->txc;
  526.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  527.    const unsigned s = 5;
  528.    unsigned i;
  529.    uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
  530.    unsigned n[2] = { 0, 0 };
  531.  
  532.    for (i = 0; i < nvc0->num_textures[s]; ++i) {
  533.       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
  534.       struct nv04_resource *res;
  535.       const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
  536.  
  537.       if (!tic) {
  538.          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
  539.          continue;
  540.       }
  541.       res = nv04_resource(tic->pipe.texture);
  542.  
  543.       if (tic->id < 0) {
  544.          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
  545.  
  546.          PUSH_SPACE(push, 16);
  547.          BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
  548.          PUSH_DATAh(push, txc->offset + (tic->id * 32));
  549.          PUSH_DATA (push, txc->offset + (tic->id * 32));
  550.          BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
  551.          PUSH_DATA (push, 32);
  552.          PUSH_DATA (push, 1);
  553.          BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
  554.          PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
  555.          PUSH_DATAp(push, &tic->tic[0], 8);
  556.  
  557.          commands[0][n[0]++] = (tic->id << 4) | 1;
  558.       } else
  559.       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
  560.          commands[1][n[1]++] = (tic->id << 4) | 1;
  561.       }
  562.       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
  563.  
  564.       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
  565.       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
  566.  
  567.       nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
  568.       nvc0->tex_handles[s][i] |= tic->id;
  569.       if (dirty)
  570.          BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
  571.    }
  572.    for (; i < nvc0->state.num_textures[s]; ++i)
  573.       nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
  574.  
  575.    if (n[0]) {
  576.       BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
  577.       PUSH_DATAp(push, commands[0], n[0]);
  578.    }
  579.    if (n[1]) {
  580.       BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
  581.       PUSH_DATAp(push, commands[1], n[1]);
  582.    }
  583.  
  584.    nvc0->state.num_textures[s] = nvc0->num_textures[s];
  585. }
  586.  
  587.  
  588. #ifdef DEBUG
  589. static const char *nve4_cache_split_name(unsigned value)
  590. {
  591.    switch (value) {
  592.    case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
  593.    case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
  594.    case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
  595.    default:
  596.       return "(invalid)";
  597.    }
  598. }
  599.  
  600. static void
  601. nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
  602. {
  603.    const uint32_t *data = (const uint32_t *)desc;
  604.    unsigned i;
  605.    boolean zero = FALSE;
  606.  
  607.    debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
  608.  
  609.    for (i = 0; i < sizeof(*desc); i += 4) {
  610.       if (data[i / 4]) {
  611.          debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
  612.          zero = FALSE;
  613.       } else
  614.       if (!zero) {
  615.          debug_printf("...\n");
  616.          zero = TRUE;
  617.       }
  618.    }
  619.  
  620.    debug_printf("entry = 0x%x\n", desc->entry);
  621.    debug_printf("grid dimensions = %ux%ux%u\n",
  622.                 desc->griddim_x, desc->griddim_y, desc->griddim_z);
  623.    debug_printf("block dimensions = %ux%ux%u\n",
  624.                 desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
  625.    debug_printf("s[] size: 0x%x\n", desc->shared_size);
  626.    debug_printf("l[] size: -0x%x / +0x%x\n",
  627.                 desc->local_size_n, desc->local_size_p);
  628.    debug_printf("stack size: 0x%x\n", desc->cstack_size);
  629.    debug_printf("barrier count: %u\n", desc->bar_alloc);
  630.    debug_printf("$r count: %u\n", desc->gpr_alloc);
  631.    debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
  632.  
  633.    for (i = 0; i < 8; ++i) {
  634.       uint64_t address;
  635.       uint32_t size = desc->cb[i].size;
  636.       boolean valid = !!(desc->cb_mask & (1 << i));
  637.  
  638.       address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
  639.  
  640.       if (!valid && !address && !size)
  641.          continue;
  642.       debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
  643.                    i, address, size, valid ? "" : "  (invalid)");
  644.    }
  645. }
  646. #endif
  647.  
  648. #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
  649. static void
  650. nve4_compute_trap_info(struct nvc0_context *nvc0)
  651. {
  652.    struct nvc0_screen *screen = nvc0->screen;
  653.    struct nouveau_bo *bo = screen->parm;
  654.    int ret, i;
  655.    volatile struct nve4_mp_trap_info *info;
  656.    uint8_t *map;
  657.  
  658.    ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client);
  659.    if (ret)
  660.       return;
  661.    map = (uint8_t *)bo->map;
  662.    info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO);
  663.  
  664.    if (info->lock) {
  665.       debug_printf("trapstat = %08x\n", info->trapstat);
  666.       debug_printf("warperr = %08x\n", info->warperr);
  667.       debug_printf("PC = %x\n", info->pc);
  668.       debug_printf("tid = %u %u %u\n",
  669.                    info->tid[0], info->tid[1], info->tid[2]);
  670.       debug_printf("ctaid = %u %u %u\n",
  671.                    info->ctaid[0], info->ctaid[1], info->ctaid[2]);
  672.       for (i = 0; i <= 63; ++i)
  673.          debug_printf("$r%i = %08x\n", i, info->r[i]);
  674.       for (i = 0; i <= 6; ++i)
  675.          debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1);
  676.       debug_printf("$c = %x\n", info->flags >> 12);
  677.    }
  678.    info->lock = 0;
  679. }
  680. #endif
  681.