Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2013 Nouveau Project
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * Authors: Christoph Bumiller, Samuel Pitoiset
  23.  */
  24.  
  25. #include "nvc0/nvc0_context.h"
  26. #include "nvc0/nvc0_compute.h"
  27.  
  28. int
  29. nvc0_screen_compute_setup(struct nvc0_screen *screen,
  30.                           struct nouveau_pushbuf *push)
  31. {
  32.    struct nouveau_object *chan = screen->base.channel;
  33.    struct nouveau_device *dev = screen->base.device;
  34.    uint32_t obj_class;
  35.    int ret;
  36.    int i;
  37.  
  38.    switch (dev->chipset & ~0xf) {
  39.    case 0xc0:
  40.       if (dev->chipset == 0xc8)
  41.          obj_class = NVC8_COMPUTE_CLASS;
  42.       else
  43.          obj_class = NVC0_COMPUTE_CLASS;
  44.       break;
  45.    case 0xd0:
  46.       obj_class = NVC0_COMPUTE_CLASS;
  47.       break;
  48.    default:
  49.       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
  50.       return -1;
  51.    }
  52.  
  53.    ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
  54.                             &screen->compute);
  55.    if (ret) {
  56.       NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
  57.       return ret;
  58.    }
  59.  
  60.    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
  61.                         &screen->parm);
  62.    if (ret)
  63.       return ret;
  64.  
  65.    BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
  66.    PUSH_DATA (push, screen->compute->oclass);
  67.  
  68.    /* hardware limit */
  69.    BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
  70.    PUSH_DATA (push, screen->mp_count);
  71.    BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
  72.    PUSH_DATA (push, 0xf);
  73.  
  74.    BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
  75.    PUSH_DATA (push, 0x8000);
  76.  
  77.    /* global memory setup */
  78.    BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
  79.    PUSH_DATA (push, 0);
  80.    BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
  81.    for (i = 0; i <= 0xff; i++)
  82.       PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
  83.    BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
  84.    PUSH_DATA (push, 1);
  85.  
  86.    /* local memory and cstack setup */
  87.    BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
  88.    PUSH_DATAh(push, screen->tls->offset);
  89.    PUSH_DATA (push, screen->tls->offset);
  90.    BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
  91.    PUSH_DATAh(push, screen->tls->size);
  92.    PUSH_DATA (push, screen->tls->size);
  93.    BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
  94.    PUSH_DATA (push, 0);
  95.    BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
  96.    PUSH_DATA (push, 1 << 24);
  97.  
  98.    /* shared memory setup */
  99.    BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
  100.    PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
  101.    BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
  102.    PUSH_DATA (push, 2 << 24);
  103.    BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
  104.    PUSH_DATA (push, 0);
  105.  
  106.    /* code segment setup */
  107.    BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
  108.    PUSH_DATAh(push, screen->text->offset);
  109.    PUSH_DATA (push, screen->text->offset);
  110.  
  111.    /* bind parameters buffer */
  112.    BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
  113.    PUSH_DATA (push, screen->parm->size);
  114.    PUSH_DATAh(push, screen->parm->offset);
  115.    PUSH_DATA (push, screen->parm->offset);
  116.    BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
  117.    PUSH_DATA (push, (0 << 8) | 1);
  118.  
  119.    /* TODO: textures & samplers */
  120.  
  121.    return 0;
  122. }
  123.  
  124. boolean
  125. nvc0_compute_validate_program(struct nvc0_context *nvc0)
  126. {
  127.    struct nvc0_program *prog = nvc0->compprog;
  128.  
  129.    if (prog->mem)
  130.       return TRUE;
  131.  
  132.    if (!prog->translated) {
  133.       prog->translated = nvc0_program_translate(
  134.          prog, nvc0->screen->base.device->chipset);
  135.       if (!prog->translated)
  136.          return FALSE;
  137.    }
  138.    if (unlikely(!prog->code_size))
  139.       return FALSE;
  140.  
  141.    if (likely(prog->code_size)) {
  142.       if (nvc0_program_upload_code(nvc0, prog)) {
  143.          struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  144.          BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
  145.          PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
  146.          return TRUE;
  147.       }
  148.    }
  149.    return FALSE;
  150. }
  151.  
  152. static boolean
  153. nvc0_compute_state_validate(struct nvc0_context *nvc0)
  154. {
  155.    if (!nvc0_compute_validate_program(nvc0))
  156.       return FALSE;
  157.  
  158.    /* TODO: textures, samplers, surfaces, global memory buffers */
  159.  
  160.    nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
  161.  
  162.    nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
  163.    if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
  164.       return FALSE;
  165.    if (unlikely(nvc0->state.flushed))
  166.       nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
  167.  
  168.    return TRUE;
  169.  
  170. }
  171.  
  172. static void
  173. nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
  174. {
  175.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  176.    struct nvc0_screen *screen = nvc0->screen;
  177.    struct nvc0_program *cp = nvc0->compprog;
  178.  
  179.    if (cp->parm_size) {
  180.       BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
  181.       PUSH_DATA (push, align(cp->parm_size, 0x100));
  182.       PUSH_DATAh(push, screen->parm->offset);
  183.       PUSH_DATA (push, screen->parm->offset);
  184.       BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
  185.       PUSH_DATA (push, (0 << 8) | 1);
  186.       /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
  187.       BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
  188.       PUSH_DATA (push, 0);
  189.       PUSH_DATAp(push, input, cp->parm_size / 4);
  190.  
  191.       BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
  192.       PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
  193.    }
  194. }
  195.  
  196. void
  197. nvc0_launch_grid(struct pipe_context *pipe,
  198.                  const uint *block_layout, const uint *grid_layout,
  199.                  uint32_t label,
  200.                  const void *input)
  201. {
  202.    struct nvc0_context *nvc0 = nvc0_context(pipe);
  203.    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
  204.    struct nvc0_program *cp = nvc0->compprog;
  205.    unsigned s, i;
  206.    int ret;
  207.  
  208.    ret = !nvc0_compute_state_validate(nvc0);
  209.    if (ret)
  210.       goto out;
  211.  
  212.    nvc0_compute_upload_input(nvc0, input);
  213.  
  214.    BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
  215.    PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
  216.  
  217.    BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
  218.    PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
  219.    PUSH_DATA (push, 0);
  220.    PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
  221.  
  222.    BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
  223.    PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
  224.    PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
  225.    PUSH_DATA (push, cp->num_barriers);
  226.    BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
  227.    PUSH_DATA (push, cp->num_gprs);
  228.  
  229.    /* grid/block setup */
  230.    BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
  231.    PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
  232.    PUSH_DATA (push, grid_layout[2]);
  233.    BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
  234.    PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
  235.    PUSH_DATA (push, block_layout[2]);
  236.  
  237.    /* launch preliminary setup */
  238.    BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
  239.    PUSH_DATA (push, 0x1);
  240.    BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
  241.    PUSH_DATA (push, 0);
  242.    BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
  243.    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
  244.  
  245.    /* kernel launching */
  246.    BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
  247.    PUSH_DATA (push, 0);
  248.    BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
  249.    PUSH_DATA (push, 0);
  250.    BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
  251.    PUSH_DATA (push, 0x1000);
  252.    BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
  253.    PUSH_DATA (push, 0);
  254.    BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
  255.    PUSH_DATA (push, 0x1);
  256.  
  257.    /* rebind all the 3D constant buffers
  258.     * (looks like binding a CB on COMPUTE clobbers 3D state) */
  259.    nvc0->dirty |= NVC0_NEW_CONSTBUF;
  260.    for (s = 0; s < 6; s++) {
  261.       for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
  262.          if (nvc0->constbuf[s][i].u.buf)
  263.             nvc0->constbuf_dirty[s] |= 1 << i;
  264.    }
  265.    memset(nvc0->state.uniform_buffer_bound, 0,
  266.           sizeof(nvc0->state.uniform_buffer_bound));
  267.  
  268. out:
  269.    if (ret)
  270.       NOUVEAU_ERR("Failed to launch grid !\n");
  271. }
  272.